forked from Archive/PX4-Autopilot
Merge branch 'master' into seatbelt_multirotor
This commit is contained in:
commit
f8900f002c
|
@ -1,62 +1,26 @@
|
|||
.built
|
||||
.context
|
||||
*.context
|
||||
*.bdat
|
||||
*.pdat
|
||||
.depend
|
||||
.updated
|
||||
.config
|
||||
.config-e
|
||||
.version
|
||||
.project
|
||||
.cproject
|
||||
apps/builtin/builtin_list.h
|
||||
apps/builtin/builtin_proto.h
|
||||
Make.dep
|
||||
*.pyc
|
||||
*.o
|
||||
*.a
|
||||
*.d
|
||||
*~
|
||||
*.dSYM
|
||||
Images/*.bin
|
||||
Images/*.px4
|
||||
nuttx/Make.defs
|
||||
nuttx/setenv.sh
|
||||
nuttx/arch/arm/include/board
|
||||
nuttx/arch/arm/include/chip
|
||||
nuttx/arch/arm/src/board
|
||||
nuttx/arch/arm/src/chip
|
||||
nuttx/include/apps
|
||||
nuttx/include/arch
|
||||
nuttx/include/math.h
|
||||
nuttx/include/nuttx/config.h
|
||||
nuttx/include/nuttx/version.h
|
||||
nuttx/tools/mkconfig
|
||||
nuttx/tools/mkconfig.exe
|
||||
nuttx/tools/mkversion
|
||||
nuttx/tools/mkversion.exe
|
||||
nuttx/nuttx
|
||||
nuttx/System.map
|
||||
nuttx/nuttx.bin
|
||||
nuttx/nuttx.hex
|
||||
.configured
|
||||
.settings
|
||||
Firmware.sublime-workspace
|
||||
.DS_Store
|
||||
cscope.out
|
||||
.configX-e
|
||||
nuttx-export.zip
|
||||
.~lock.*
|
||||
dot.gdbinit
|
||||
mavlink/include/mavlink/v0.9/
|
||||
.*.swp
|
||||
.swp
|
||||
core
|
||||
.gdbinit
|
||||
mkdeps
|
||||
Archives
|
||||
Build
|
||||
!ROMFS/*/*.d
|
||||
!ROMFS/*/*/*.d
|
||||
!ROMFS/*/*/*/*.d
|
||||
*.dSYM
|
||||
*.o
|
||||
*.pyc
|
||||
*~
|
||||
.*.swp
|
||||
.context
|
||||
.cproject
|
||||
.DS_Store
|
||||
.gdbinit
|
||||
.project
|
||||
.settings
|
||||
.swp
|
||||
.~lock.*
|
||||
Archives/*
|
||||
Build/*
|
||||
core
|
||||
cscope.out
|
||||
dot.gdbinit
|
||||
Firmware.sublime-workspace
|
||||
Images/*.bin
|
||||
Images/*.px4
|
||||
mavlink/include/mavlink/v0.9/
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
Helicopter 120 degree Cyclic-Collective-Pitch Mixing (CCPM) for PX4FMU
|
||||
==================================================
|
||||
|
||||
|
||||
Output 0 - Rear Servo Mixer
|
||||
----------------
|
||||
|
||||
Rear Servo = Collective (Thrust - 3) + Elevator (Pitch - 1)
|
||||
|
||||
M: 2
|
||||
O: 10000 10000 0 -10000 10000
|
||||
S: 0 3 10000 10000 0 -10000 10000
|
||||
S: 0 1 10000 10000 0 -10000 10000
|
||||
|
||||
|
||||
Output 1 - Left Servo Mixer
|
||||
-----------------
|
||||
Left Servo = Collective (Thurst - 3) - 0.5 * Elevator (Pitch - 1) + 0.866 * Aileron (Roll - 0)
|
||||
|
||||
M: 3
|
||||
O: 10000 10000 0 -10000 10000
|
||||
S: 0 3 -10000 -10000 0 -10000 10000
|
||||
S: 0 1 -5000 -5000 0 -10000 10000
|
||||
S: 0 0 8660 8660 0 -10000 10000
|
||||
|
||||
|
||||
Output 2 - Right Servo Mixer
|
||||
----------------
|
||||
Right Servo = Collective (Thurst - 3) - 0.5 * Elevator (Pitch - 1) - 0.866 * Aileron (Roll - 0)
|
||||
|
||||
|
||||
M: 3
|
||||
O: 10000 10000 0 -10000 10000
|
||||
S: 0 3 -10000 -10000 0 -10000 10000
|
||||
S: 0 1 -5000 -5000 0 -10000 10000
|
||||
S: 0 0 -8660 -8660 0 -10000 10000
|
||||
|
||||
Output 3 - Tail Servo Mixer
|
||||
----------------
|
||||
Tail Servo = Yaw (control index = 2)
|
||||
|
||||
M: 1
|
||||
O: 10000 10000 0 -10000 10000
|
||||
S: 0 2 10000 10000 0 -10000 10000
|
||||
|
||||
|
||||
Output 4 - Motor speed mixer
|
||||
-----------------
|
||||
This would be the motor speed control output from governor power demand- not sure what index to use here?
|
||||
|
||||
M: 1
|
||||
O: 10000 10000 0 -10000 10000
|
||||
S: 0 4 0 20000 -10000 -10000 10000
|
|
@ -0,0 +1,59 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
"""Convert binary log generated by sdlog to CSV format
|
||||
|
||||
Usage: python logconv.py <log.bin>"""
|
||||
|
||||
__author__ = "Anton Babushkin"
|
||||
__version__ = "0.1"
|
||||
|
||||
import struct, sys
|
||||
|
||||
def _unpack_packet(data):
|
||||
s = ""
|
||||
s += "Q" #.timestamp = buf.raw.timestamp,
|
||||
s += "fff" #.gyro = {buf.raw.gyro_rad_s[0], buf.raw.gyro_rad_s[1], buf.raw.gyro_rad_s[2]},
|
||||
s += "fff" #.accel = {buf.raw.accelerometer_m_s2[0], buf.raw.accelerometer_m_s2[1], buf.raw.accelerometer_m_s2[2]},
|
||||
s += "fff" #.mag = {buf.raw.magnetometer_ga[0], buf.raw.magnetometer_ga[1], buf.raw.magnetometer_ga[2]},
|
||||
s += "f" #.baro = buf.raw.baro_pres_mbar,
|
||||
s += "f" #.baro_alt = buf.raw.baro_alt_meter,
|
||||
s += "f" #.baro_temp = buf.raw.baro_temp_celcius,
|
||||
s += "ffff" #.control = {buf.act_controls.control[0], buf.act_controls.control[1], buf.act_controls.control[2], buf.act_controls.control[3]},
|
||||
s += "ffffffff" #.actuators = {buf.act_outputs.output[0], buf.act_outputs.output[1], buf.act_outputs.output[2], buf.act_outputs.output[3], buf.act_outputs.output[4], buf.act_outputs.output[5], buf.act_outputs.output[6], buf.act_outputs.output[7]},
|
||||
s += "f" #.vbat = buf.batt.voltage_v,
|
||||
s += "f" #.bat_current = buf.batt.current_a,
|
||||
s += "f" #.bat_discharged = buf.batt.discharged_mah,
|
||||
s += "ffff" #.adc = {buf.raw.adc_voltage_v[0], buf.raw.adc_voltage_v[1], buf.raw.adc_voltage_v[2], buf.raw.adc_voltage_v[3]},
|
||||
s += "fff" #.local_position = {buf.local_pos.x, buf.local_pos.y, buf.local_pos.z},
|
||||
s += "iii" #.gps_raw_position = {buf.gps_pos.lat, buf.gps_pos.lon, buf.gps_pos.alt},
|
||||
s += "fff" #.attitude = {buf.att.pitch, buf.att.roll, buf.att.yaw},
|
||||
s += "fffffffff" #.rotMatrix = {buf.att.R[0][0], buf.att.R[0][1], buf.att.R[0][2], buf.att.R[1][0], buf.att.R[1][1], buf.att.R[1][2], buf.att.R[2][0], buf.att.R[2][1], buf.att.R[2][2]},
|
||||
s += "fff" #.vicon = {buf.vicon_pos.x, buf.vicon_pos.y, buf.vicon_pos.z, buf.vicon_pos.roll, buf.vicon_pos.pitch, buf.vicon_pos.yaw},
|
||||
s += "ffff" #.control_effective = {buf.act_controls_effective.control_effective[0], buf.act_controls_effective.control_effective[1], buf.act_controls_effective.control_effective[2], buf.act_controls_effective.control_effective[3]},
|
||||
s += "ffffff" #.flow = {buf.flow.flow_raw_x, buf.flow.flow_raw_y, buf.flow.flow_comp_x_m, buf.flow.flow_comp_y_m, buf.flow.ground_distance_m, buf.flow.quality},
|
||||
s += "f" #.diff_pressure = buf.diff_pres.differential_pressure_pa,
|
||||
s += "f" #.ind_airspeed = buf.airspeed.indicated_airspeed_m_s,
|
||||
s += "f" #.true_airspeed = buf.airspeed.true_airspeed_m_s
|
||||
s += "iii" # to align to 280
|
||||
d = struct.unpack(s, data)
|
||||
return d
|
||||
|
||||
def _main():
|
||||
if len(sys.argv) < 2:
|
||||
print "Usage:\npython logconv.py <log.bin>"
|
||||
return
|
||||
fn = sys.argv[1]
|
||||
sysvector_size = 280
|
||||
f = open(fn, "r")
|
||||
while True:
|
||||
data = f.read(sysvector_size)
|
||||
if len(data) < sysvector_size:
|
||||
break
|
||||
a = []
|
||||
for i in _unpack_packet(data):
|
||||
a.append(str(i))
|
||||
print ";".join(a)
|
||||
f.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
_main()
|
|
@ -0,0 +1,10 @@
|
|||
*.a
|
||||
*.bdat
|
||||
*.pdat
|
||||
.built
|
||||
.config
|
||||
.depend
|
||||
.updated
|
||||
builtin/builtin_list.h
|
||||
builtin/builtin_proto.h
|
||||
Make.dep
|
|
@ -81,15 +81,19 @@ MODULES += modules/multirotor_pos_control
|
|||
MODULES += modules/sdlog
|
||||
|
||||
#
|
||||
# Libraries
|
||||
# Library modules
|
||||
#
|
||||
MODULES += modules/systemlib
|
||||
MODULES += modules/systemlib/mixer
|
||||
MODULES += modules/mathlib
|
||||
MODULES += modules/mathlib/CMSIS
|
||||
MODULES += modules/controllib
|
||||
MODULES += modules/uORB
|
||||
|
||||
#
|
||||
# Libraries
|
||||
#
|
||||
LIBRARIES += modules/mathlib/CMSIS
|
||||
|
||||
#
|
||||
# Demo apps
|
||||
#
|
||||
|
|
|
@ -180,18 +180,6 @@ EXTRA_CLEANS =
|
|||
# Modules
|
||||
################################################################################
|
||||
|
||||
#
|
||||
# We don't actually know what a module is called; all we have is a path fragment
|
||||
# that we can search for, and where we expect to find a module.mk file.
|
||||
#
|
||||
# As such, we replicate the successfully-found path inside WORK_DIR for the
|
||||
# module's build products in order to keep modules separated from each other.
|
||||
#
|
||||
# XXX If this becomes unwieldy or breaks for other reasons, we will need to
|
||||
# move to allocating directory names and keeping tabs on makefiles via
|
||||
# the directory name. That will involve arithmetic (it'd probably be time
|
||||
# for GMSL).
|
||||
|
||||
# where to look for modules
|
||||
MODULE_SEARCH_DIRS += $(WORK_DIR) $(MODULE_SRC) $(PX4_MODULE_SRC)
|
||||
|
||||
|
@ -248,6 +236,66 @@ $(MODULE_CLEANS):
|
|||
MODULE_MK=$(mkfile) \
|
||||
clean
|
||||
|
||||
################################################################################
|
||||
# Libraries
|
||||
################################################################################
|
||||
|
||||
# where to look for libraries
|
||||
LIBRARY_SEARCH_DIRS += $(WORK_DIR) $(MODULE_SRC) $(PX4_MODULE_SRC)
|
||||
|
||||
# sort and unique the library list
|
||||
LIBRARIES := $(sort $(LIBRARIES))
|
||||
|
||||
# locate the first instance of a library by full path or by looking on the
|
||||
# library search path
|
||||
define LIBRARY_SEARCH
|
||||
$(firstword $(abspath $(wildcard $(1)/library.mk)) \
|
||||
$(abspath $(foreach search_dir,$(LIBRARY_SEARCH_DIRS),$(wildcard $(search_dir)/$(1)/library.mk))) \
|
||||
MISSING_$1)
|
||||
endef
|
||||
|
||||
# make a list of library makefiles and check that we found them all
|
||||
LIBRARY_MKFILES := $(foreach library,$(LIBRARIES),$(call LIBRARY_SEARCH,$(library)))
|
||||
MISSING_LIBRARIES := $(subst MISSING_,,$(filter MISSING_%,$(LIBRARY_MKFILES)))
|
||||
ifneq ($(MISSING_LIBRARIES),)
|
||||
$(error Can't find library(s): $(MISSING_LIBRARIES))
|
||||
endif
|
||||
|
||||
# Make a list of the archive files we expect to build from libraries
|
||||
# Note that this path will typically contain a double-slash at the WORK_DIR boundary; this must be
|
||||
# preserved as it is used below to get the absolute path for the library.mk file correct.
|
||||
#
|
||||
LIBRARY_LIBS := $(foreach path,$(dir $(LIBRARY_MKFILES)),$(WORK_DIR)$(path)library.a)
|
||||
|
||||
# rules to build module objects
|
||||
.PHONY: $(LIBRARY_LIBS)
|
||||
$(LIBRARY_LIBS): relpath = $(patsubst $(WORK_DIR)%,%,$@)
|
||||
$(LIBRARY_LIBS): mkfile = $(patsubst %library.a,%library.mk,$(relpath))
|
||||
$(LIBRARY_LIBS): workdir = $(@D)
|
||||
$(LIBRARY_LIBS): $(GLOBAL_DEPS) $(NUTTX_CONFIG_HEADER)
|
||||
$(Q) $(MKDIR) -p $(workdir)
|
||||
$(Q) $(MAKE) -r -f $(PX4_MK_DIR)library.mk \
|
||||
-C $(workdir) \
|
||||
LIBRARY_WORK_DIR=$(workdir) \
|
||||
LIBRARY_LIB=$@ \
|
||||
LIBRARY_MK=$(mkfile) \
|
||||
LIBRARY_NAME=$(lastword $(subst /, ,$(workdir))) \
|
||||
library
|
||||
|
||||
# make a list of phony clean targets for modules
|
||||
LIBRARY_CLEANS := $(foreach path,$(dir $(LIBRARY_MKFILES)),$(WORK_DIR)$(path)/clean)
|
||||
|
||||
# rules to clean modules
|
||||
.PHONY: $(LIBRARY_CLEANS)
|
||||
$(LIBRARY_CLEANS): relpath = $(patsubst $(WORK_DIR)%,%,$@)
|
||||
$(LIBRARY_CLEANS): mkfile = $(patsubst %clean,%library.mk,$(relpath))
|
||||
$(LIBRARY_CLEANS):
|
||||
@$(ECHO) %% cleaning using $(mkfile)
|
||||
$(Q) $(MAKE) -r -f $(PX4_MK_DIR)library.mk \
|
||||
LIBRARY_WORK_DIR=$(dir $@) \
|
||||
LIBRARY_MK=$(mkfile) \
|
||||
clean
|
||||
|
||||
################################################################################
|
||||
# NuttX libraries and paths
|
||||
################################################################################
|
||||
|
@ -420,8 +468,8 @@ $(PRODUCT_BUNDLE): $(PRODUCT_BIN)
|
|||
$(PRODUCT_BIN): $(PRODUCT_ELF)
|
||||
$(call SYM_TO_BIN,$<,$@)
|
||||
|
||||
$(PRODUCT_ELF): $(OBJS) $(MODULE_OBJS) $(GLOBAL_DEPS) $(LINK_DEPS) $(MODULE_MKFILES)
|
||||
$(call LINK,$@,$(OBJS) $(MODULE_OBJS))
|
||||
$(PRODUCT_ELF): $(OBJS) $(MODULE_OBJS) $(LIBRARY_LIBS) $(GLOBAL_DEPS) $(LINK_DEPS) $(MODULE_MKFILES)
|
||||
$(call LINK,$@,$(OBJS) $(MODULE_OBJS) $(LIBRARY_LIBS))
|
||||
|
||||
#
|
||||
# Utility rules
|
||||
|
|
|
@ -0,0 +1,169 @@
|
|||
#
|
||||
# Copyright (c) 2013 PX4 Development Team. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# 3. Neither the name PX4 nor the names of its contributors may be
|
||||
# used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
#
|
||||
# Framework makefile for PX4 libraries
|
||||
#
|
||||
# This makefile is invoked by firmware.mk to build each of the linraries
|
||||
# that will subsequently be linked into the firmware image.
|
||||
#
|
||||
# Applications are built as standard ar archives. Unlike modules,
|
||||
# all public symbols in library objects are visible across the entire
|
||||
# firmware stack.
|
||||
#
|
||||
# In general, modules should be preferred to libraries when possible.
|
||||
# Libraries may also be pre-built.
|
||||
#
|
||||
# IMPORTANT NOTE:
|
||||
#
|
||||
# This makefile assumes it is being invoked in the library's output directory.
|
||||
#
|
||||
|
||||
#
|
||||
# Variables that can be set by the library's library.mk:
|
||||
#
|
||||
#
|
||||
# SRCS (optional)
|
||||
#
|
||||
# Lists the .c, cpp and .S files that should be compiled/assembled to
|
||||
# produce the library.
|
||||
#
|
||||
# PREBUILT_LIB (optional)
|
||||
#
|
||||
# Names the prebuilt library in the source directory that should be
|
||||
# linked into the firmware.
|
||||
#
|
||||
# INCLUDE_DIRS (optional, must be appended, ignored if SRCS not set)
|
||||
#
|
||||
# The list of directories searched for include files. If non-standard
|
||||
# includes (e.g. those from another module) are required, paths to search
|
||||
# can be added here.
|
||||
#
|
||||
#
|
||||
|
||||
#
|
||||
# Variables visible to the library's library.mk:
|
||||
#
|
||||
# CONFIG
|
||||
# BOARD
|
||||
# LIBRARY_WORK_DIR
|
||||
# LIBRARY_LIB
|
||||
# LIBRARY_MK
|
||||
# Anything set in setup.mk, board_$(BOARD).mk and the toolchain file.
|
||||
# Anything exported from config_$(CONFIG).mk
|
||||
#
|
||||
|
||||
################################################################################
|
||||
# No user-serviceable parts below.
|
||||
################################################################################
|
||||
|
||||
ifeq ($(LIBRARY_MK),)
|
||||
$(error No library makefile specified)
|
||||
endif
|
||||
$(info %% LIBRARY_MK = $(LIBRARY_MK))
|
||||
|
||||
#
|
||||
# Get the board/toolchain config
|
||||
#
|
||||
include $(PX4_MK_DIR)/board_$(BOARD).mk
|
||||
|
||||
#
|
||||
# Get the library's config
|
||||
#
|
||||
include $(LIBRARY_MK)
|
||||
LIBRARY_SRC := $(dir $(LIBRARY_MK))
|
||||
$(info % LIBRARY_NAME = $(LIBRARY_NAME))
|
||||
$(info % LIBRARY_SRC = $(LIBRARY_SRC))
|
||||
$(info % LIBRARY_LIB = $(LIBRARY_LIB))
|
||||
$(info % LIBRARY_WORK_DIR = $(LIBRARY_WORK_DIR))
|
||||
|
||||
#
|
||||
# Things that, if they change, might affect everything
|
||||
#
|
||||
GLOBAL_DEPS += $(MAKEFILE_LIST)
|
||||
|
||||
################################################################################
|
||||
# Build rules
|
||||
################################################################################
|
||||
|
||||
#
|
||||
# What we're going to build
|
||||
#
|
||||
library: $(LIBRARY_LIB)
|
||||
|
||||
ifneq ($(PREBUILT_LIB),)
|
||||
|
||||
VPATH = $(LIBRARY_SRC)
|
||||
$(LIBRARY_LIB): $(PREBUILT_LIB) $(GLOBAL_DEPS)
|
||||
@$(ECHO) "PREBUILT: $(PREBUILT_LIB)"
|
||||
$(Q) $(COPY) $< $@
|
||||
|
||||
else
|
||||
|
||||
##
|
||||
## Object files we will generate from sources
|
||||
##
|
||||
|
||||
OBJS = $(addsuffix .o,$(SRCS))
|
||||
|
||||
#
|
||||
# SRCS -> OBJS rules
|
||||
#
|
||||
|
||||
$(OBJS): $(GLOBAL_DEPS)
|
||||
|
||||
vpath %.c $(LIBRARY_SRC)
|
||||
$(filter %.c.o,$(OBJS)): %.c.o: %.c $(GLOBAL_DEPS)
|
||||
$(call COMPILE,$<,$@)
|
||||
|
||||
vpath %.cpp $(LIBRARY_SRC)
|
||||
$(filter %.cpp.o,$(OBJS)): %.cpp.o: %.cpp $(GLOBAL_DEPS)
|
||||
$(call COMPILEXX,$<,$@)
|
||||
|
||||
vpath %.S $(LIBRARY_SRC)
|
||||
$(filter %.S.o,$(OBJS)): %.S.o: %.S $(GLOBAL_DEPS)
|
||||
$(call ASSEMBLE,$<,$@)
|
||||
|
||||
#
|
||||
# Built product rules
|
||||
#
|
||||
|
||||
$(LIBRARY_LIB): $(OBJS) $(GLOBAL_DEPS)
|
||||
$(call ARCHIVE,$@,$(OBJS))
|
||||
|
||||
endif
|
||||
|
||||
#
|
||||
# Utility rules
|
||||
#
|
||||
|
||||
clean:
|
||||
$(Q) $(REMOVE) $(LIBRARY_LIB) $(OBJS)
|
|
@ -35,7 +35,7 @@
|
|||
# This makefile is invoked by firmware.mk to build each of the modules
|
||||
# that will subsequently be linked into the firmware image.
|
||||
#
|
||||
# Applications are built as prelinked objects with a limited set of exported
|
||||
# Modules are built as prelinked objects with a limited set of exported
|
||||
# symbols, as the global namespace is shared between all modules. Normally an
|
||||
# module will just export one or more <command>_main functions.
|
||||
#
|
||||
|
@ -183,30 +183,15 @@ CXXFLAGS += -fvisibility=$(DEFAULT_VISIBILITY) -include $(PX4_INCLUDE_DIR)visibi
|
|||
#
|
||||
module: $(MODULE_OBJ) $(MODULE_COMMAND_FILES)
|
||||
|
||||
##
|
||||
## Locate sources (allows relative source paths in module.mk)
|
||||
##
|
||||
#define SRC_SEARCH
|
||||
# $(abspath $(firstword $(wildcard $1) $(wildcard $(MODULE_SRC)/$1) MISSING_$1))
|
||||
#endef
|
||||
#
|
||||
#ABS_SRCS ?= $(foreach src,$(SRCS),$(call SRC_SEARCH,$(src)))
|
||||
#MISSING_SRCS := $(subst MISSING_,,$(filter MISSING_%,$(ABS_SRCS)))
|
||||
#ifneq ($(MISSING_SRCS),)
|
||||
#$(error $(MODULE_MK): missing in SRCS: $(MISSING_SRCS))
|
||||
#endif
|
||||
#ifeq ($(ABS_SRCS),)
|
||||
#$(error $(MODULE_MK): nothing to compile in SRCS)
|
||||
#endif
|
||||
# Object files we will generate from sources
|
||||
#
|
||||
##
|
||||
## Object files we will generate from sources
|
||||
##
|
||||
#OBJS := $(foreach src,$(ABS_SRCS),$(MODULE_WORK_DIR)$(src).o)
|
||||
OBJS = $(addsuffix .o,$(SRCS))
|
||||
|
||||
OBJS = $(addsuffix .o,$(SRCS))
|
||||
$(info SRCS $(SRCS))
|
||||
$(info OBJS $(OBJS))
|
||||
#
|
||||
# Dependency files that will be auto-generated
|
||||
#
|
||||
DEPS = $(addsuffix .d,$(SRCS))
|
||||
|
||||
#
|
||||
# SRCS -> OBJS rules
|
||||
|
@ -239,3 +224,5 @@ $(MODULE_OBJ): $(OBJS) $(GLOBAL_DEPS)
|
|||
|
||||
clean:
|
||||
$(Q) $(REMOVE) $(MODULE_PRELINK) $(OBJS)
|
||||
|
||||
-include $(DEPS)
|
||||
|
|
|
@ -235,7 +235,7 @@ endef
|
|||
define LINK
|
||||
@$(ECHO) "LINK: $1"
|
||||
@$(MKDIR) -p $(dir $1)
|
||||
$(Q) $(LD) $(LDFLAGS) -o $1 --start-group $2 $(LIBS) $(EXTRA_LIBS) $(LIBGCC) --end-group
|
||||
$(Q) $(LD) $(LDFLAGS) -Map $1.map -o $1 --start-group $2 $(LIBS) $(EXTRA_LIBS) $(LIBGCC) --end-group
|
||||
endef
|
||||
|
||||
# Convert $1 from a linked object to a raw binary in $2
|
||||
|
@ -280,6 +280,7 @@ define BIN_TO_OBJ
|
|||
$(Q) $(OBJCOPY) $2 \
|
||||
--redefine-sym $(call BIN_SYM_PREFIX,$1)_start=$3 \
|
||||
--redefine-sym $(call BIN_SYM_PREFIX,$1)_size=$3_len \
|
||||
--strip-symbol $(call BIN_SYM_PREFIX,$1)_end
|
||||
--strip-symbol $(call BIN_SYM_PREFIX,$1)_end \
|
||||
--rename-section .data=.rodata
|
||||
$(Q) $(REMOVE) $2.c $2.c.o
|
||||
endef
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
*.a
|
||||
.config
|
||||
.config-e
|
||||
.configX-e
|
||||
.depend
|
||||
.version
|
||||
arch/arm/include/board
|
||||
arch/arm/include/chip
|
||||
arch/arm/src/board
|
||||
arch/arm/src/chip
|
||||
include/apps
|
||||
include/arch
|
||||
include/math.h
|
||||
include/nuttx/config.h
|
||||
include/nuttx/version.h
|
||||
Make.defs
|
||||
Make.dep
|
||||
mkdeps
|
||||
nuttx
|
||||
nuttx-export.zip
|
||||
nuttx.bin
|
||||
nuttx.hex
|
||||
setenv.sh
|
||||
System.map
|
||||
tools/mkconfig
|
||||
tools/mkconfig.exe
|
||||
tools/mkversion
|
||||
tools/mkversion.exe
|
|
@ -1273,6 +1273,9 @@ PX4IO::print_status()
|
|||
((alarms & PX4IO_P_STATUS_ALARMS_FMU_LOST) ? " FMU_LOST" : ""),
|
||||
((alarms & PX4IO_P_STATUS_ALARMS_RC_LOST) ? " RC_LOST" : ""),
|
||||
((alarms & PX4IO_P_STATUS_ALARMS_PWM_ERROR) ? " PWM_ERROR" : ""));
|
||||
/* now clear alarms */
|
||||
io_reg_set(PX4IO_PAGE_STATUS, PX4IO_P_STATUS_ALARMS, 0xFFFF);
|
||||
|
||||
printf("vbatt %u ibatt %u vbatt scale %u\n",
|
||||
io_reg_get(PX4IO_PAGE_STATUS, PX4IO_P_STATUS_VBATT),
|
||||
io_reg_get(PX4IO_PAGE_STATUS, PX4IO_P_STATUS_IBATT),
|
||||
|
|
|
@ -330,7 +330,7 @@ static void hrt_call_invoke(void);
|
|||
/*
|
||||
* PPM decoder tuning parameters
|
||||
*/
|
||||
# define PPM_MAX_PULSE_WIDTH 500 /* maximum width of a pulse */
|
||||
# define PPM_MAX_PULSE_WIDTH 550 /* maximum width of a valid pulse */
|
||||
# define PPM_MIN_CHANNEL_VALUE 800 /* shortest valid channel signal */
|
||||
# define PPM_MAX_CHANNEL_VALUE 2200 /* longest valid channel signal */
|
||||
# define PPM_MIN_START 2500 /* shortest valid start gap */
|
||||
|
|
|
@ -154,7 +154,14 @@ void control_heading(const struct vehicle_global_position_s *pos, const struct v
|
|||
/* calculate heading error */
|
||||
float yaw_err = att->yaw - bearing;
|
||||
/* apply control gain */
|
||||
att_sp->roll_body = yaw_err * p.hdng_p;
|
||||
float roll_command = yaw_err * p.hdng_p;
|
||||
|
||||
/* limit output, this commonly is a tuning parameter, too */
|
||||
if (att_sp->roll_body < -0.5f) {
|
||||
att_sp->roll_body = -0.5f;
|
||||
} else if (att_sp->roll_body > 0.5f) {
|
||||
att_sp->roll_body = 0.5f;
|
||||
}
|
||||
}
|
||||
|
||||
/* Main Thread */
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (C) 2012 PX4 Development Team. All rights reserved.
|
||||
* Copyright (c) 2012, 2013 PX4 Development Team. All rights reserved.
|
||||
* Author: Lorenz Meier <lm@inf.ethz.ch>
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -47,27 +47,42 @@
|
|||
*/
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
/*
|
||||
/**
|
||||
* The mavlink log device node; must be opened before messages
|
||||
* can be logged.
|
||||
*/
|
||||
#define MAVLINK_LOG_DEVICE "/dev/mavlink"
|
||||
/**
|
||||
* The maximum string length supported.
|
||||
*/
|
||||
#define MAVLINK_LOG_MAXLEN 50
|
||||
|
||||
#define MAVLINK_IOC_SEND_TEXT_INFO _IOC(0x1100, 1)
|
||||
#define MAVLINK_IOC_SEND_TEXT_CRITICAL _IOC(0x1100, 2)
|
||||
#define MAVLINK_IOC_SEND_TEXT_EMERGENCY _IOC(0x1100, 3)
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
__EXPORT void mavlink_vasprintf(int _fd, int severity, const char *fmt, ...);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The va_args implementation here is not beautiful, but obviously we run into the same issues
|
||||
* the GCC devs saw, and are using their solution:
|
||||
*
|
||||
* http://gcc.gnu.org/onlinedocs/cpp/Variadic-Macros.html
|
||||
*/
|
||||
|
||||
/**
|
||||
* Send a mavlink emergency message.
|
||||
*
|
||||
* @param _fd A file descriptor returned from open(MAVLINK_LOG_DEVICE, 0);
|
||||
* @param _text The text to log;
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
#define mavlink_log_emergency(_fd, _text) ::ioctl(_fd, MAVLINK_IOC_SEND_TEXT_EMERGENCY, (unsigned long)_text);
|
||||
#else
|
||||
#define mavlink_log_emergency(_fd, _text) ioctl(_fd, MAVLINK_IOC_SEND_TEXT_EMERGENCY, (unsigned long)_text);
|
||||
#endif
|
||||
#define mavlink_log_emergency(_fd, _text, ...) mavlink_vasprintf(_fd, MAVLINK_IOC_SEND_TEXT_EMERGENCY, _text, ##__VA_ARGS__);
|
||||
|
||||
/**
|
||||
* Send a mavlink critical message.
|
||||
|
@ -75,11 +90,7 @@
|
|||
* @param _fd A file descriptor returned from open(MAVLINK_LOG_DEVICE, 0);
|
||||
* @param _text The text to log;
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
#define mavlink_log_critical(_fd, _text) ::ioctl(_fd, MAVLINK_IOC_SEND_TEXT_CRITICAL, (unsigned long)_text);
|
||||
#else
|
||||
#define mavlink_log_critical(_fd, _text) ioctl(_fd, MAVLINK_IOC_SEND_TEXT_CRITICAL, (unsigned long)_text);
|
||||
#endif
|
||||
#define mavlink_log_critical(_fd, _text, ...) mavlink_vasprintf(_fd, MAVLINK_IOC_SEND_TEXT_CRITICAL, _text, ##__VA_ARGS__);
|
||||
|
||||
/**
|
||||
* Send a mavlink info message.
|
||||
|
@ -87,14 +98,10 @@
|
|||
* @param _fd A file descriptor returned from open(MAVLINK_LOG_DEVICE, 0);
|
||||
* @param _text The text to log;
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
#define mavlink_log_info(_fd, _text) ::ioctl(_fd, MAVLINK_IOC_SEND_TEXT_INFO, (unsigned long)_text);
|
||||
#else
|
||||
#define mavlink_log_info(_fd, _text) ioctl(_fd, MAVLINK_IOC_SEND_TEXT_INFO, (unsigned long)_text);
|
||||
#endif
|
||||
#define mavlink_log_info(_fd, _text, ...) mavlink_vasprintf(_fd, MAVLINK_IOC_SEND_TEXT_INFO, _text, ##__VA_ARGS__);
|
||||
|
||||
struct mavlink_logmessage {
|
||||
char text[51];
|
||||
char text[MAVLINK_LOG_MAXLEN + 1];
|
||||
unsigned char severity;
|
||||
};
|
||||
|
||||
|
@ -116,5 +123,7 @@ void mavlink_logbuffer_write(struct mavlink_logbuffer *lb, const struct mavlink_
|
|||
|
||||
int mavlink_logbuffer_read(struct mavlink_logbuffer *lb, struct mavlink_logmessage *elem);
|
||||
|
||||
void mavlink_logbuffer_vasprintf(struct mavlink_logbuffer *lb, int severity, const char *fmt, ...);
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -37,4 +37,5 @@
|
|||
|
||||
MODULE_COMMAND = fixedwing_backside
|
||||
|
||||
SRCS = fixedwing_backside_main.cpp
|
||||
SRCS = fixedwing_backside_main.cpp \
|
||||
params.c
|
||||
|
|
|
@ -1,159 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_abs_f32.c
|
||||
*
|
||||
* Description: Vector absolute value.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
#include <math.h>
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup BasicAbs Vector Absolute Value
|
||||
*
|
||||
* Computes the absolute value of a vector on an element-by-element basis.
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = abs(pSrcA[n]), 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
* The operation can be done in-place by setting the input and output pointers to the same buffer.
|
||||
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicAbs
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Floating-point vector absolute value.
|
||||
* @param[in] *pSrc points to the input buffer
|
||||
* @param[out] *pDst points to the output buffer
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_abs_f32(
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
float32_t in1, in2, in3, in4; /* temporary variables */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = |A| */
|
||||
/* Calculate absolute and then store the results in the destination buffer. */
|
||||
/* read sample from source */
|
||||
in1 = *pSrc;
|
||||
in2 = *(pSrc + 1);
|
||||
in3 = *(pSrc + 2);
|
||||
|
||||
/* find absolute value */
|
||||
in1 = fabsf(in1);
|
||||
|
||||
/* read sample from source */
|
||||
in4 = *(pSrc + 3);
|
||||
|
||||
/* find absolute value */
|
||||
in2 = fabsf(in2);
|
||||
|
||||
/* read sample from source */
|
||||
*pDst = in1;
|
||||
|
||||
/* find absolute value */
|
||||
in3 = fabsf(in3);
|
||||
|
||||
/* find absolute value */
|
||||
in4 = fabsf(in4);
|
||||
|
||||
/* store result to destination */
|
||||
*(pDst + 1) = in2;
|
||||
|
||||
/* store result to destination */
|
||||
*(pDst + 2) = in3;
|
||||
|
||||
/* store result to destination */
|
||||
*(pDst + 3) = in4;
|
||||
|
||||
|
||||
/* Update source pointer to process next sampels */
|
||||
pSrc += 4u;
|
||||
|
||||
/* Update destination pointer to process next sampels */
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = |A| */
|
||||
/* Calculate absolute and then store the results in the destination buffer. */
|
||||
*pDst++ = fabsf(*pSrc++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicAbs group
|
||||
*/
|
|
@ -1,173 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_abs_q15.c
|
||||
*
|
||||
* Description: Q15 vector absolute value.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicAbs
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q15 vector absolute value.
|
||||
* @param[in] *pSrc points to the input buffer
|
||||
* @param[out] *pDst points to the output buffer
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
|
||||
*/
|
||||
|
||||
void arm_abs_q15(
|
||||
q15_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q15_t in1; /* Input value1 */
|
||||
q15_t in2; /* Input value2 */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = |A| */
|
||||
/* Read two inputs */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
|
||||
|
||||
/* Store the Absolute result in the destination buffer by packing the two values, in a single cycle */
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
*__SIMD32(pDst)++ =
|
||||
__PKHBT(((in1 > 0) ? in1 : __QSUB16(0, in1)),
|
||||
((in2 > 0) ? in2 : __QSUB16(0, in2)), 16);
|
||||
|
||||
#else
|
||||
|
||||
|
||||
*__SIMD32(pDst)++ =
|
||||
__PKHBT(((in2 > 0) ? in2 : __QSUB16(0, in2)),
|
||||
((in1 > 0) ? in1 : __QSUB16(0, in1)), 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
*__SIMD32(pDst)++ =
|
||||
__PKHBT(((in1 > 0) ? in1 : __QSUB16(0, in1)),
|
||||
((in2 > 0) ? in2 : __QSUB16(0, in2)), 16);
|
||||
|
||||
#else
|
||||
|
||||
|
||||
*__SIMD32(pDst)++ =
|
||||
__PKHBT(((in2 > 0) ? in2 : __QSUB16(0, in2)),
|
||||
((in1 > 0) ? in1 : __QSUB16(0, in1)), 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = |A| */
|
||||
/* Read the input */
|
||||
in1 = *pSrc++;
|
||||
|
||||
/* Calculate absolute value of input and then store the result in the destination buffer. */
|
||||
*pDst++ = (in1 > 0) ? in1 : __QSUB16(0, in1);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
q15_t in; /* Temporary input variable */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = |A| */
|
||||
/* Read the input */
|
||||
in = *pSrc++;
|
||||
|
||||
/* Calculate absolute value of input and then store the result in the destination buffer. */
|
||||
*pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicAbs group
|
||||
*/
|
|
@ -1,125 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_abs_q31.c
|
||||
*
|
||||
* Description: Q31 vector absolute value.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicAbs
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Q31 vector absolute value.
|
||||
* @param[in] *pSrc points to the input buffer
|
||||
* @param[out] *pDst points to the output buffer
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.
|
||||
*/
|
||||
|
||||
void arm_abs_q31(
|
||||
q31_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
q31_t in; /* Input value */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t in1, in2, in3, in4;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = |A| */
|
||||
/* Calculate absolute of input (if -1 then saturated to 0x7fffffff) and then store the results in the destination buffer. */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
in3 = *pSrc++;
|
||||
in4 = *pSrc++;
|
||||
|
||||
*pDst++ = (in1 > 0) ? in1 : __QSUB(0, in1);
|
||||
*pDst++ = (in2 > 0) ? in2 : __QSUB(0, in2);
|
||||
*pDst++ = (in3 > 0) ? in3 : __QSUB(0, in3);
|
||||
*pDst++ = (in4 > 0) ? in4 : __QSUB(0, in4);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = |A| */
|
||||
/* Calculate absolute value of the input (if -1 then saturated to 0x7fffffff) and then store the results in the destination buffer. */
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in > 0) ? in : ((in == 0x80000000) ? 0x7fffffff : -in);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicAbs group
|
||||
*/
|
|
@ -1,152 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_abs_q7.c
|
||||
*
|
||||
* Description: Q7 vector absolute value.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicAbs
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q7 vector absolute value.
|
||||
* @param[in] *pSrc points to the input buffer
|
||||
* @param[out] *pDst points to the output buffer
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* \par Conditions for optimum performance
|
||||
* Input and output buffers should be aligned by 32-bit
|
||||
*
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F.
|
||||
*/
|
||||
|
||||
void arm_abs_q7(
|
||||
q7_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
q7_t in; /* Input value1 */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t in1, in2, in3, in4; /* temporary input variables */
|
||||
q31_t out1, out2, out3, out4; /* temporary output variables */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = |A| */
|
||||
/* Read inputs */
|
||||
in1 = (q31_t) * pSrc;
|
||||
in2 = (q31_t) * (pSrc + 1);
|
||||
in3 = (q31_t) * (pSrc + 2);
|
||||
|
||||
/* find absolute value */
|
||||
out1 = (in1 > 0) ? in1 : __QSUB8(0, in1);
|
||||
|
||||
/* read input */
|
||||
in4 = (q31_t) * (pSrc + 3);
|
||||
|
||||
/* find absolute value */
|
||||
out2 = (in2 > 0) ? in2 : __QSUB8(0, in2);
|
||||
|
||||
/* store result to destination */
|
||||
*pDst = (q7_t) out1;
|
||||
|
||||
/* find absolute value */
|
||||
out3 = (in3 > 0) ? in3 : __QSUB8(0, in3);
|
||||
|
||||
/* find absolute value */
|
||||
out4 = (in4 > 0) ? in4 : __QSUB8(0, in4);
|
||||
|
||||
/* store result to destination */
|
||||
*(pDst + 1) = (q7_t) out2;
|
||||
|
||||
/* store result to destination */
|
||||
*(pDst + 2) = (q7_t) out3;
|
||||
|
||||
/* store result to destination */
|
||||
*(pDst + 3) = (q7_t) out4;
|
||||
|
||||
/* update pointers to process next samples */
|
||||
pSrc += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif // #define ARM_MATH_CM0
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = |A| */
|
||||
/* Read the input */
|
||||
in = *pSrc++;
|
||||
|
||||
/* Store the Absolute result in the destination buffer */
|
||||
*pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? 0x7f : -in);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicAbs group
|
||||
*/
|
|
@ -1,145 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_add_f32.c
|
||||
*
|
||||
* Description: Floating-point vector addition.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup BasicAdd Vector Addition
|
||||
*
|
||||
* Element-by-element addition of two vectors.
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = pSrcA[n] + pSrcB[n], 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicAdd
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Floating-point vector addition.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_add_f32(
|
||||
float32_t * pSrcA,
|
||||
float32_t * pSrcB,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
float32_t inA1, inA2, inA3, inA4; /* temporary input variabels */
|
||||
float32_t inB1, inB2, inB3, inB4; /* temporary input variables */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
|
||||
/* read four inputs from sourceA and four inputs from sourceB */
|
||||
inA1 = *pSrcA;
|
||||
inB1 = *pSrcB;
|
||||
inA2 = *(pSrcA + 1);
|
||||
inB2 = *(pSrcB + 1);
|
||||
inA3 = *(pSrcA + 2);
|
||||
inB3 = *(pSrcB + 2);
|
||||
inA4 = *(pSrcA + 3);
|
||||
inB4 = *(pSrcB + 3);
|
||||
|
||||
/* C = A + B */
|
||||
/* add and store result to destination */
|
||||
*pDst = inA1 + inB1;
|
||||
*(pDst + 1) = inA2 + inB2;
|
||||
*(pDst + 2) = inA3 + inB3;
|
||||
*(pDst + 3) = inA4 + inB4;
|
||||
|
||||
/* update pointers to process next samples */
|
||||
pSrcA += 4u;
|
||||
pSrcB += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
*pDst++ = (*pSrcA++) + (*pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicAdd group
|
||||
*/
|
|
@ -1,135 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_add_q15.c
|
||||
*
|
||||
* Description: Q15 vector addition
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicAdd
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q15 vector addition.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_add_q15(
|
||||
q15_t * pSrcA,
|
||||
q15_t * pSrcB,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t inA1, inA2, inB1, inB2;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
inA1 = *__SIMD32(pSrcA)++;
|
||||
inA2 = *__SIMD32(pSrcA)++;
|
||||
inB1 = *__SIMD32(pSrcB)++;
|
||||
inB2 = *__SIMD32(pSrcB)++;
|
||||
|
||||
*__SIMD32(pDst)++ = __QADD16(inA1, inB1);
|
||||
*__SIMD32(pDst)++ = __QADD16(inA2, inB2);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
*pDst++ = (q15_t) __QADD16(*pSrcA++, *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) * pSrcA++ + *pSrcB++), 16);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicAdd group
|
||||
*/
|
|
@ -1,143 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_add_q31.c
|
||||
*
|
||||
* Description: Q31 vector addition.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicAdd
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Q31 vector addition.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_add_q31(
|
||||
q31_t * pSrcA,
|
||||
q31_t * pSrcB,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t inA1, inA2, inA3, inA4;
|
||||
q31_t inB1, inB2, inB3, inB4;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
inA1 = *pSrcA++;
|
||||
inA2 = *pSrcA++;
|
||||
inB1 = *pSrcB++;
|
||||
inB2 = *pSrcB++;
|
||||
|
||||
inA3 = *pSrcA++;
|
||||
inA4 = *pSrcA++;
|
||||
inB3 = *pSrcB++;
|
||||
inB4 = *pSrcB++;
|
||||
|
||||
*pDst++ = __QADD(inA1, inB1);
|
||||
*pDst++ = __QADD(inA2, inB2);
|
||||
*pDst++ = __QADD(inA3, inB3);
|
||||
*pDst++ = __QADD(inA4, inB4);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
*pDst++ = __QADD(*pSrcA++, *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrcA++ + *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicAdd group
|
||||
*/
|
|
@ -1,129 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_add_q7.c
|
||||
*
|
||||
* Description: Q7 vector addition.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicAdd
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q7 vector addition.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_add_q7(
|
||||
q7_t * pSrcA,
|
||||
q7_t * pSrcB,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
*__SIMD32(pDst)++ = __QADD8(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
*pDst++ = (q7_t) __SSAT(*pSrcA++ + *pSrcB++, 8);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + B */
|
||||
/* Add and then store the results in the destination buffer. */
|
||||
*pDst++ = (q7_t) __SSAT((q15_t) * pSrcA++ + *pSrcB++, 8);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicAdd group
|
||||
*/
|
|
@ -1,125 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_dot_prod_f32.c
|
||||
*
|
||||
* Description: Floating-point dot product.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup dot_prod Vector Dot Product
|
||||
*
|
||||
* Computes the dot product of two vectors.
|
||||
* The vectors are multiplied element-by-element and then summed.
|
||||
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup dot_prod
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Dot product of floating-point vectors.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @param[out] *result output result returned here
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
|
||||
void arm_dot_prod_f32(
|
||||
float32_t * pSrcA,
|
||||
float32_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
float32_t * result)
|
||||
{
|
||||
float32_t sum = 0.0f; /* Temporary result storage */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
/* Calculate dot product and then store the result in a temporary buffer */
|
||||
sum += (*pSrcA++) * (*pSrcB++);
|
||||
sum += (*pSrcA++) * (*pSrcB++);
|
||||
sum += (*pSrcA++) * (*pSrcB++);
|
||||
sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
/* Calculate dot product and then store the result in a temporary buffer. */
|
||||
sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
/* Store the result back in the destination buffer */
|
||||
*result = sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of dot_prod group
|
||||
*/
|
|
@ -1,135 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_dot_prod_q15.c
|
||||
*
|
||||
* Description: Q15 dot product.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup dot_prod
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Dot product of Q15 vectors.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @param[out] *result output result returned here
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The intermediate multiplications are in 1.15 x 1.15 = 2.30 format and these
|
||||
* results are added to a 64-bit accumulator in 34.30 format.
|
||||
* Nonsaturating additions are used and given that there are 33 guard bits in the accumulator
|
||||
* there is no risk of overflow.
|
||||
* The return result is in 34.30 format.
|
||||
*/
|
||||
|
||||
void arm_dot_prod_q15(
|
||||
q15_t * pSrcA,
|
||||
q15_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
q63_t * result)
|
||||
{
|
||||
q63_t sum = 0; /* Temporary result storage */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
/* Calculate dot product and then store the result in a temporary buffer. */
|
||||
sum = __SMLALD(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++, sum);
|
||||
sum = __SMLALD(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++, sum);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
/* Calculate dot product and then store the results in a temporary buffer. */
|
||||
sum = __SMLALD(*pSrcA++, *pSrcB++, sum);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
/* Calculate dot product and then store the results in a temporary buffer. */
|
||||
sum += (q63_t) ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
/* Store the result in the destination buffer in 34.30 format */
|
||||
*result = sum;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of dot_prod group
|
||||
*/
|
|
@ -1,138 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_dot_prod_q31.c
|
||||
*
|
||||
* Description: Q31 dot product.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup dot_prod
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Dot product of Q31 vectors.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @param[out] *result output result returned here
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The intermediate multiplications are in 1.31 x 1.31 = 2.62 format and these
|
||||
* are truncated to 2.48 format by discarding the lower 14 bits.
|
||||
* The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.
|
||||
* There are 15 guard bits in the accumulator and there is no risk of overflow as long as
|
||||
* the length of the vectors is less than 2^16 elements.
|
||||
* The return result is in 16.48 format.
|
||||
*/
|
||||
|
||||
void arm_dot_prod_q31(
|
||||
q31_t * pSrcA,
|
||||
q31_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
q63_t * result)
|
||||
{
|
||||
q63_t sum = 0; /* Temporary result storage */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t inA1, inA2, inA3, inA4;
|
||||
q31_t inB1, inB2, inB3, inB4;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
/* Calculate dot product and then store the result in a temporary buffer. */
|
||||
inA1 = *pSrcA++;
|
||||
inA2 = *pSrcA++;
|
||||
inA3 = *pSrcA++;
|
||||
inA4 = *pSrcA++;
|
||||
inB1 = *pSrcB++;
|
||||
inB2 = *pSrcB++;
|
||||
inB3 = *pSrcB++;
|
||||
inB4 = *pSrcB++;
|
||||
|
||||
sum += ((q63_t) inA1 * inB1) >> 14u;
|
||||
sum += ((q63_t) inA2 * inB2) >> 14u;
|
||||
sum += ((q63_t) inA3 * inB3) >> 14u;
|
||||
sum += ((q63_t) inA4 * inB4) >> 14u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
/* Calculate dot product and then store the result in a temporary buffer. */
|
||||
sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Store the result in the destination buffer in 16.48 format */
|
||||
*result = sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of dot_prod group
|
||||
*/
|
|
@ -1,154 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_dot_prod_q7.c
|
||||
*
|
||||
* Description: Q7 dot product.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup dot_prod
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Dot product of Q7 vectors.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @param[out] *result output result returned here
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The intermediate multiplications are in 1.7 x 1.7 = 2.14 format and these
|
||||
* results are added to an accumulator in 18.14 format.
|
||||
* Nonsaturating additions are used and there is no danger of wrap around as long as
|
||||
* the vectors are less than 2^18 elements long.
|
||||
* The return result is in 18.14 format.
|
||||
*/
|
||||
|
||||
void arm_dot_prod_q7(
|
||||
q7_t * pSrcA,
|
||||
q7_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
q31_t * result)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
q31_t sum = 0; /* Temporary variables to store output */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q31_t input1, input2; /* Temporary variables to store input */
|
||||
q31_t inA1, inA2, inB1, inB2; /* Temporary variables to store input */
|
||||
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* read 4 samples at a time from sourceA */
|
||||
input1 = *__SIMD32(pSrcA)++;
|
||||
/* read 4 samples at a time from sourceB */
|
||||
input2 = *__SIMD32(pSrcB)++;
|
||||
|
||||
/* extract two q7_t samples to q15_t samples */
|
||||
inA1 = __SXTB16(__ROR(input1, 8));
|
||||
/* extract reminaing two samples */
|
||||
inA2 = __SXTB16(input1);
|
||||
/* extract two q7_t samples to q15_t samples */
|
||||
inB1 = __SXTB16(__ROR(input2, 8));
|
||||
/* extract reminaing two samples */
|
||||
inB2 = __SXTB16(input2);
|
||||
|
||||
/* multiply and accumulate two samples at a time */
|
||||
sum = __SMLAD(inA1, inB1, sum);
|
||||
sum = __SMLAD(inA2, inB2, sum);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
/* Dot product and then store the results in a temporary buffer. */
|
||||
sum = __SMLAD(*pSrcA++, *pSrcB++, sum);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||
/* Dot product and then store the results in a temporary buffer. */
|
||||
sum += (q31_t) ((q15_t) * pSrcA++ * *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
/* Store the result in the destination buffer in 18.14 format */
|
||||
*result = sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of dot_prod group
|
||||
*/
|
|
@ -1,172 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mult_f32.c
|
||||
*
|
||||
* Description: Floating-point vector multiplication.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.5 2010/04/26
|
||||
* incorporated review comments and updated with latest CMSIS layer
|
||||
*
|
||||
* Version 0.0.3 2010/03/10
|
||||
* Initial version
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup BasicMult Vector Multiplication
|
||||
*
|
||||
* Element-by-element multiplication of two vectors.
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = pSrcA[n] * pSrcB[n], 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicMult
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Floating-point vector multiplication.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_mult_f32(
|
||||
float32_t * pSrcA,
|
||||
float32_t * pSrcB,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
float32_t inA1, inA2, inA3, inA4; /* temporary input variables */
|
||||
float32_t inB1, inB2, inB3, inB4; /* temporary input variables */
|
||||
float32_t out1, out2, out3, out4; /* temporary output variables */
|
||||
|
||||
/* loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * B */
|
||||
/* Multiply the inputs and store the results in output buffer */
|
||||
/* read sample from sourceA */
|
||||
inA1 = *pSrcA;
|
||||
/* read sample from sourceB */
|
||||
inB1 = *pSrcB;
|
||||
/* read sample from sourceA */
|
||||
inA2 = *(pSrcA + 1);
|
||||
/* read sample from sourceB */
|
||||
inB2 = *(pSrcB + 1);
|
||||
|
||||
/* out = sourceA * sourceB */
|
||||
out1 = inA1 * inB1;
|
||||
|
||||
/* read sample from sourceA */
|
||||
inA3 = *(pSrcA + 2);
|
||||
/* read sample from sourceB */
|
||||
inB3 = *(pSrcB + 2);
|
||||
|
||||
/* out = sourceA * sourceB */
|
||||
out2 = inA2 * inB2;
|
||||
|
||||
/* read sample from sourceA */
|
||||
inA4 = *(pSrcA + 3);
|
||||
|
||||
/* store result to destination buffer */
|
||||
*pDst = out1;
|
||||
|
||||
/* read sample from sourceB */
|
||||
inB4 = *(pSrcB + 3);
|
||||
|
||||
/* out = sourceA * sourceB */
|
||||
out3 = inA3 * inB3;
|
||||
|
||||
/* store result to destination buffer */
|
||||
*(pDst + 1) = out2;
|
||||
|
||||
/* out = sourceA * sourceB */
|
||||
out4 = inA4 * inB4;
|
||||
/* store result to destination buffer */
|
||||
*(pDst + 2) = out3;
|
||||
/* store result to destination buffer */
|
||||
*(pDst + 3) = out4;
|
||||
|
||||
|
||||
/* update pointers to process next samples */
|
||||
pSrcA += 4u;
|
||||
pSrcB += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * B */
|
||||
/* Multiply the inputs and store the results in output buffer */
|
||||
*pDst++ = (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicMult group
|
||||
*/
|
|
@ -1,152 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mult_q15.c
|
||||
*
|
||||
* Description: Q15 vector multiplication.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.5 2010/04/26
|
||||
* incorporated review comments and updated with latest CMSIS layer
|
||||
*
|
||||
* Version 0.0.3 2010/03/10
|
||||
* Initial version
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicMult
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Q15 vector multiplication
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_mult_q15(
|
||||
q15_t * pSrcA,
|
||||
q15_t * pSrcB,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t inA1, inA2, inB1, inB2; /* temporary input variables */
|
||||
q15_t out1, out2, out3, out4; /* temporary output variables */
|
||||
q31_t mul1, mul2, mul3, mul4; /* temporary variables */
|
||||
|
||||
/* loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* read two samples at a time from sourceA */
|
||||
inA1 = *__SIMD32(pSrcA)++;
|
||||
/* read two samples at a time from sourceB */
|
||||
inB1 = *__SIMD32(pSrcB)++;
|
||||
/* read two samples at a time from sourceA */
|
||||
inA2 = *__SIMD32(pSrcA)++;
|
||||
/* read two samples at a time from sourceB */
|
||||
inB2 = *__SIMD32(pSrcB)++;
|
||||
|
||||
/* multiply mul = sourceA * sourceB */
|
||||
mul1 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
|
||||
mul2 = (q31_t) ((q15_t) inA1 * (q15_t) inB1);
|
||||
mul3 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB2 >> 16));
|
||||
mul4 = (q31_t) ((q15_t) inA2 * (q15_t) inB2);
|
||||
|
||||
/* saturate result to 16 bit */
|
||||
out1 = (q15_t) __SSAT(mul1 >> 15, 16);
|
||||
out2 = (q15_t) __SSAT(mul2 >> 15, 16);
|
||||
out3 = (q15_t) __SSAT(mul3 >> 15, 16);
|
||||
out4 = (q15_t) __SSAT(mul4 >> 15, 16);
|
||||
|
||||
/* store the result */
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
*__SIMD32(pDst)++ = __PKHBT(out2, out1, 16);
|
||||
*__SIMD32(pDst)++ = __PKHBT(out4, out3, 16);
|
||||
|
||||
#else
|
||||
|
||||
*__SIMD32(pDst)++ = __PKHBT(out2, out1, 16);
|
||||
*__SIMD32(pDst)++ = __PKHBT(out4, out3, 16);
|
||||
|
||||
#endif // #ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * B */
|
||||
/* Multiply the inputs and store the result in the destination buffer */
|
||||
*pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicMult group
|
||||
*/
|
|
@ -1,143 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mult_q31.c
|
||||
*
|
||||
* Description: Q31 vector multiplication.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.5 2010/04/26
|
||||
* incorporated review comments and updated with latest CMSIS layer
|
||||
*
|
||||
* Version 0.0.3 2010/03/10
|
||||
* Initial version
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicMult
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q31 vector multiplication.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_mult_q31(
|
||||
q31_t * pSrcA,
|
||||
q31_t * pSrcB,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t inA1, inA2, inA3, inA4; /* temporary input variables */
|
||||
q31_t inB1, inB2, inB3, inB4; /* temporary input variables */
|
||||
q31_t out1, out2, out3, out4; /* temporary output variables */
|
||||
|
||||
/* loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * B */
|
||||
/* Multiply the inputs and then store the results in the destination buffer. */
|
||||
inA1 = *pSrcA++;
|
||||
inA2 = *pSrcA++;
|
||||
inA3 = *pSrcA++;
|
||||
inA4 = *pSrcA++;
|
||||
inB1 = *pSrcB++;
|
||||
inB2 = *pSrcB++;
|
||||
inB3 = *pSrcB++;
|
||||
inB4 = *pSrcB++;
|
||||
|
||||
out1 = ((q63_t) inA1 * inB1) >> 32;
|
||||
out2 = ((q63_t) inA2 * inB2) >> 32;
|
||||
out3 = ((q63_t) inA3 * inB3) >> 32;
|
||||
out4 = ((q63_t) inA4 * inB4) >> 32;
|
||||
|
||||
out1 = __SSAT(out1, 31);
|
||||
out2 = __SSAT(out2, 31);
|
||||
out3 = __SSAT(out3, 31);
|
||||
out4 = __SSAT(out4, 31);
|
||||
|
||||
*pDst++ = out1 << 1u;
|
||||
*pDst++ = out2 << 1u;
|
||||
*pDst++ = out3 << 1u;
|
||||
*pDst++ = out4 << 1u;
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * B */
|
||||
/* Multiply the inputs and then store the results in the destination buffer. */
|
||||
*pDst++ =
|
||||
(q31_t) clip_q63_to_q31(((q63_t) (*pSrcA++) * (*pSrcB++)) >> 31);
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicMult group
|
||||
*/
|
|
@ -1,128 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mult_q7.c
|
||||
*
|
||||
* Description: Q7 vector multiplication.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
*
|
||||
* Version 0.0.5 2010/04/26
|
||||
* incorporated review comments and updated with latest CMSIS layer
|
||||
*
|
||||
* Version 0.0.3 2010/03/10 DP
|
||||
* Initial version
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicMult
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q7 vector multiplication
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_mult_q7(
|
||||
q7_t * pSrcA,
|
||||
q7_t * pSrcB,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q7_t out1, out2, out3, out4; /* Temporary variables to store the product */
|
||||
|
||||
/* loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * B */
|
||||
/* Multiply the inputs and store the results in temporary variables */
|
||||
out1 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||
out2 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||
out3 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||
out4 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||
|
||||
/* Store the results of 4 inputs in the destination buffer in single cycle by packing */
|
||||
*__SIMD32(pDst)++ = __PACKq7(out1, out2, out3, out4);
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * B */
|
||||
/* Multiply the inputs and store the result in the destination buffer */
|
||||
*pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicMult group
|
||||
*/
|
|
@ -1,137 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_negate_f32.c
|
||||
*
|
||||
* Description: Negates floating-point vectors.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup negate Vector Negate
|
||||
*
|
||||
* Negates the elements of a vector.
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = -pSrc[n], 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup negate
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Negates the elements of a floating-point vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_negate_f32(
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
float32_t in1, in2, in3, in4; /* temporary variables */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* read inputs from source */
|
||||
in1 = *pSrc;
|
||||
in2 = *(pSrc + 1);
|
||||
in3 = *(pSrc + 2);
|
||||
in4 = *(pSrc + 3);
|
||||
|
||||
/* negate the input */
|
||||
in1 = -in1;
|
||||
in2 = -in2;
|
||||
in3 = -in3;
|
||||
in4 = -in4;
|
||||
|
||||
/* store the result to destination */
|
||||
*pDst = in1;
|
||||
*(pDst + 1) = in2;
|
||||
*(pDst + 2) = in3;
|
||||
*(pDst + 3) = in4;
|
||||
|
||||
/* update pointers to process next samples */
|
||||
pSrc += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = -A */
|
||||
/* Negate and then store the results in the destination buffer. */
|
||||
*pDst++ = -*pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of negate group
|
||||
*/
|
|
@ -1,137 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_negate_q15.c
|
||||
*
|
||||
* Description: Negates Q15 vectors.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup negate
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Negates the elements of a Q15 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* \par Conditions for optimum performance
|
||||
* Input and output buffers should be aligned by 32-bit
|
||||
*
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
|
||||
*/
|
||||
|
||||
void arm_negate_q15(
|
||||
q15_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
q15_t in;
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q31_t in1, in2; /* Temporary variables */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = -A */
|
||||
/* Read two inputs at a time */
|
||||
in1 = _SIMD32_OFFSET(pSrc);
|
||||
in2 = _SIMD32_OFFSET(pSrc + 2);
|
||||
|
||||
/* negate two samples at a time */
|
||||
in1 = __QSUB16(0, in1);
|
||||
|
||||
/* negate two samples at a time */
|
||||
in2 = __QSUB16(0, in2);
|
||||
|
||||
/* store the result to destination 2 samples at a time */
|
||||
_SIMD32_OFFSET(pDst) = in1;
|
||||
/* store the result to destination 2 samples at a time */
|
||||
_SIMD32_OFFSET(pDst + 2) = in2;
|
||||
|
||||
|
||||
/* update pointers to process next samples */
|
||||
pSrc += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = -A */
|
||||
/* Negate and then store the result in the destination buffer. */
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in == (q15_t) 0x8000) ? 0x7fff : -in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of negate group
|
||||
*/
|
|
@ -1,124 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_negate_q31.c
|
||||
*
|
||||
* Description: Negates Q31 vectors.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup negate
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Negates the elements of a Q31 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.
|
||||
*/
|
||||
|
||||
void arm_negate_q31(
|
||||
q31_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
q31_t in; /* Temporary variable */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t in1, in2, in3, in4;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = -A */
|
||||
/* Negate and then store the results in the destination buffer. */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
in3 = *pSrc++;
|
||||
in4 = *pSrc++;
|
||||
|
||||
*pDst++ = __QSUB(0, in1);
|
||||
*pDst++ = __QSUB(0, in2);
|
||||
*pDst++ = __QSUB(0, in3);
|
||||
*pDst++ = __QSUB(0, in4);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = -A */
|
||||
/* Negate and then store the result in the destination buffer. */
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in == 0x80000000) ? 0x7fffffff : -in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of negate group
|
||||
*/
|
|
@ -1,120 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_negate_q7.c
|
||||
*
|
||||
* Description: Negates Q7 vectors.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup negate
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Negates the elements of a Q7 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F.
|
||||
*/
|
||||
|
||||
void arm_negate_q7(
|
||||
q7_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
q7_t in;
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t input; /* Input values1-4 */
|
||||
q31_t zero = 0x00000000;
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = -A */
|
||||
/* Read four inputs */
|
||||
input = *__SIMD32(pSrc)++;
|
||||
|
||||
/* Store the Negated results in the destination buffer in a single cycle by packing the results */
|
||||
*__SIMD32(pDst)++ = __QSUB8(zero, input);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = -A */
|
||||
/* Negate and then store the results in the destination buffer. */ \
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in == (q7_t) 0x80) ? 0x7f : -in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of negate group
|
||||
*/
|
|
@ -1,158 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_offset_f32.c
|
||||
*
|
||||
* Description: Floating-point vector offset.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* ---------------------------------------------------------------------------- */
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup offset Vector Offset
|
||||
*
|
||||
* Adds a constant offset to each element of a vector.
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = pSrc[n] + offset, 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup offset
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Adds a constant offset to a floating-point vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] offset is the offset to be added
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
|
||||
void arm_offset_f32(
|
||||
float32_t * pSrc,
|
||||
float32_t offset,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
float32_t in1, in2, in3, in4;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the results in the destination buffer. */
|
||||
/* read samples from source */
|
||||
in1 = *pSrc;
|
||||
in2 = *(pSrc + 1);
|
||||
|
||||
/* add offset to input */
|
||||
in1 = in1 + offset;
|
||||
|
||||
/* read samples from source */
|
||||
in3 = *(pSrc + 2);
|
||||
|
||||
/* add offset to input */
|
||||
in2 = in2 + offset;
|
||||
|
||||
/* read samples from source */
|
||||
in4 = *(pSrc + 3);
|
||||
|
||||
/* add offset to input */
|
||||
in3 = in3 + offset;
|
||||
|
||||
/* store result to destination */
|
||||
*pDst = in1;
|
||||
|
||||
/* add offset to input */
|
||||
in4 = in4 + offset;
|
||||
|
||||
/* store result to destination */
|
||||
*(pDst + 1) = in2;
|
||||
|
||||
/* store result to destination */
|
||||
*(pDst + 2) = in3;
|
||||
|
||||
/* store result to destination */
|
||||
*(pDst + 3) = in4;
|
||||
|
||||
/* update pointers to process next samples */
|
||||
pSrc += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the result in the destination buffer. */
|
||||
*pDst++ = (*pSrc++) + offset;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of offset group
|
||||
*/
|
|
@ -1,131 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_offset_q15.c
|
||||
*
|
||||
* Description: Q15 vector offset.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup offset
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Adds a constant offset to a Q15 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] offset is the offset to be added
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
|
||||
*/
|
||||
|
||||
void arm_offset_q15(
|
||||
q15_t * pSrc,
|
||||
q15_t offset,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t offset_packed; /* Offset packed to 32 bit */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* Offset is packed to 32 bit in order to use SIMD32 for addition */
|
||||
offset_packed = __PKHBT(offset, offset, 16);
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the results in the destination buffer, 2 samples at a time. */
|
||||
*__SIMD32(pDst)++ = __QADD16(*__SIMD32(pSrc)++, offset_packed);
|
||||
*__SIMD32(pDst)++ = __QADD16(*__SIMD32(pSrc)++, offset_packed);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the results in the destination buffer. */
|
||||
*pDst++ = (q15_t) __QADD16(*pSrc++, offset);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the results in the destination buffer. */
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) * pSrc++ + offset), 16);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of offset group
|
||||
*/
|
|
@ -1,135 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_offset_q31.c
|
||||
*
|
||||
* Description: Q31 vector offset.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup offset
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Adds a constant offset to a Q31 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] offset is the offset to be added
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
|
||||
*/
|
||||
|
||||
void arm_offset_q31(
|
||||
q31_t * pSrc,
|
||||
q31_t offset,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t in1, in2, in3, in4;
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the results in the destination buffer. */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
in3 = *pSrc++;
|
||||
in4 = *pSrc++;
|
||||
|
||||
*pDst++ = __QADD(in1, offset);
|
||||
*pDst++ = __QADD(in2, offset);
|
||||
*pDst++ = __QADD(in3, offset);
|
||||
*pDst++ = __QADD(in4, offset);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the result in the destination buffer. */
|
||||
*pDst++ = __QADD(*pSrc++, offset);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the result in the destination buffer. */
|
||||
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of offset group
|
||||
*/
|
|
@ -1,130 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_offset_q7.c
|
||||
*
|
||||
* Description: Q7 vector offset.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup offset
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Adds a constant offset to a Q7 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] offset is the offset to be added
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
|
||||
*/
|
||||
|
||||
void arm_offset_q7(
|
||||
q7_t * pSrc,
|
||||
q7_t offset,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t offset_packed; /* Offset packed to 32 bit */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* Offset is packed to 32 bit in order to use SIMD32 for addition */
|
||||
offset_packed = __PACKq7(offset, offset, offset, offset);
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the results in the destination bufferfor 4 samples at a time. */
|
||||
*__SIMD32(pDst)++ = __QADD8(*__SIMD32(pSrc)++, offset_packed);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the result in the destination buffer. */
|
||||
*pDst++ = (q7_t) __SSAT(*pSrc++ + offset, 8);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A + offset */
|
||||
/* Add offset and then store the result in the destination buffer. */
|
||||
*pDst++ = (q7_t) __SSAT((q15_t) * pSrc++ + offset, 8);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of offset group
|
||||
*/
|
|
@ -1,161 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_scale_f32.c
|
||||
*
|
||||
* Description: Multiplies a floating-point vector by a scalar.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup scale Vector Scale
|
||||
*
|
||||
* Multiply a vector by a scalar value. For floating-point data, the algorithm used is:
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = pSrc[n] * scale, 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
* In the fixed-point Q7, Q15, and Q31 functions, <code>scale</code> is represented by
|
||||
* a fractional multiplication <code>scaleFract</code> and an arithmetic shift <code>shift</code>.
|
||||
* The shift allows the gain of the scaling operation to exceed 1.0.
|
||||
* The algorithm used with fixed-point data is:
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = (pSrc[n] * scaleFract) << shift, 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
* The overall scale factor applied to the fixed-point data is
|
||||
* <pre>
|
||||
* scale = scaleFract * 2^shift.
|
||||
* </pre>
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup scale
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Multiplies a floating-point vector by a scalar.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] scale scale factor to be applied
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
|
||||
void arm_scale_f32(
|
||||
float32_t * pSrc,
|
||||
float32_t scale,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
float32_t in1, in2, in3, in4; /* temporary variabels */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * scale */
|
||||
/* Scale the input and then store the results in the destination buffer. */
|
||||
/* read input samples from source */
|
||||
in1 = *pSrc;
|
||||
in2 = *(pSrc + 1);
|
||||
|
||||
/* multiply with scaling factor */
|
||||
in1 = in1 * scale;
|
||||
|
||||
/* read input sample from source */
|
||||
in3 = *(pSrc + 2);
|
||||
|
||||
/* multiply with scaling factor */
|
||||
in2 = in2 * scale;
|
||||
|
||||
/* read input sample from source */
|
||||
in4 = *(pSrc + 3);
|
||||
|
||||
/* multiply with scaling factor */
|
||||
in3 = in3 * scale;
|
||||
in4 = in4 * scale;
|
||||
/* store the result to destination */
|
||||
*pDst = in1;
|
||||
*(pDst + 1) = in2;
|
||||
*(pDst + 2) = in3;
|
||||
*(pDst + 3) = in4;
|
||||
|
||||
/* update pointers to process next samples */
|
||||
pSrc += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * scale */
|
||||
/* Scale the input and then store the result in the destination buffer. */
|
||||
*pDst++ = (*pSrc++) * scale;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of scale group
|
||||
*/
|
|
@ -1,157 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_scale_q15.c
|
||||
*
|
||||
* Description: Multiplies a Q15 vector by a scalar.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup scale
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Multiplies a Q15 vector by a scalar.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] scaleFract fractional portion of the scale value
|
||||
* @param[in] shift number of bits to shift the result by
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.15 format.
|
||||
* These are multiplied to yield a 2.30 intermediate result and this is shifted with saturation to 1.15 format.
|
||||
*/
|
||||
|
||||
|
||||
void arm_scale_q15(
|
||||
q15_t * pSrc,
|
||||
q15_t scaleFract,
|
||||
int8_t shift,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
int8_t kShift = 15 - shift; /* shift to apply after scaling */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q15_t in1, in2, in3, in4;
|
||||
q31_t inA1, inA2; /* Temporary variables */
|
||||
q31_t out1, out2, out3, out4;
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Reading 2 inputs from memory */
|
||||
inA1 = *__SIMD32(pSrc)++;
|
||||
inA2 = *__SIMD32(pSrc)++;
|
||||
|
||||
/* C = A * scale */
|
||||
/* Scale the inputs and then store the 2 results in the destination buffer
|
||||
* in single cycle by packing the outputs */
|
||||
out1 = (q31_t) ((q15_t) (inA1 >> 16) * scaleFract);
|
||||
out2 = (q31_t) ((q15_t) inA1 * scaleFract);
|
||||
out3 = (q31_t) ((q15_t) (inA2 >> 16) * scaleFract);
|
||||
out4 = (q31_t) ((q15_t) inA2 * scaleFract);
|
||||
|
||||
/* apply shifting */
|
||||
out1 = out1 >> kShift;
|
||||
out2 = out2 >> kShift;
|
||||
out3 = out3 >> kShift;
|
||||
out4 = out4 >> kShift;
|
||||
|
||||
/* saturate the output */
|
||||
in1 = (q15_t) (__SSAT(out1, 16));
|
||||
in2 = (q15_t) (__SSAT(out2, 16));
|
||||
in3 = (q15_t) (__SSAT(out3, 16));
|
||||
in4 = (q15_t) (__SSAT(out4, 16));
|
||||
|
||||
/* store the result to destination */
|
||||
*__SIMD32(pDst)++ = __PKHBT(in2, in1, 16);
|
||||
*__SIMD32(pDst)++ = __PKHBT(in4, in3, 16);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * scale */
|
||||
/* Scale the input and then store the result in the destination buffer. */
|
||||
*pDst++ = (q15_t) (__SSAT(((*pSrc++) * scaleFract) >> kShift, 16));
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * scale */
|
||||
/* Scale the input and then store the result in the destination buffer. */
|
||||
*pDst++ = (q15_t) (__SSAT(((q31_t) * pSrc++ * scaleFract) >> kShift, 16));
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of scale group
|
||||
*/
|
|
@ -1,221 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_scale_q31.c
|
||||
*
|
||||
* Description: Multiplies a Q31 vector by a scalar.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup scale
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Multiplies a Q31 vector by a scalar.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] scaleFract fractional portion of the scale value
|
||||
* @param[in] shift number of bits to shift the result by
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.31 format.
|
||||
* These are multiplied to yield a 2.62 intermediate result and this is shifted with saturation to 1.31 format.
|
||||
*/
|
||||
|
||||
void arm_scale_q31(
|
||||
q31_t * pSrc,
|
||||
q31_t scaleFract,
|
||||
int8_t shift,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
int8_t kShift = shift + 1; /* Shift to apply after scaling */
|
||||
int8_t sign = (kShift & 0x80);
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
q31_t in, out;
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q31_t in1, in2, in3, in4; /* temporary input variables */
|
||||
q31_t out1, out2, out3, out4; /* temporary output variabels */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
if(sign == 0u)
|
||||
{
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* read four inputs from source */
|
||||
in1 = *pSrc;
|
||||
in2 = *(pSrc + 1);
|
||||
in3 = *(pSrc + 2);
|
||||
in4 = *(pSrc + 3);
|
||||
|
||||
/* multiply input with scaler value */
|
||||
in1 = ((q63_t) in1 * scaleFract) >> 32;
|
||||
in2 = ((q63_t) in2 * scaleFract) >> 32;
|
||||
in3 = ((q63_t) in3 * scaleFract) >> 32;
|
||||
in4 = ((q63_t) in4 * scaleFract) >> 32;
|
||||
|
||||
/* apply shifting */
|
||||
out1 = in1 << kShift;
|
||||
out2 = in2 << kShift;
|
||||
|
||||
/* saturate the results. */
|
||||
if(in1 != (out1 >> kShift))
|
||||
out1 = 0x7FFFFFFF ^ (in1 >> 31);
|
||||
|
||||
if(in2 != (out2 >> kShift))
|
||||
out2 = 0x7FFFFFFF ^ (in2 >> 31);
|
||||
|
||||
out3 = in3 << kShift;
|
||||
out4 = in4 << kShift;
|
||||
|
||||
*pDst = out1;
|
||||
*(pDst + 1) = out2;
|
||||
|
||||
if(in3 != (out3 >> kShift))
|
||||
out3 = 0x7FFFFFFF ^ (in3 >> 31);
|
||||
|
||||
if(in4 != (out4 >> kShift))
|
||||
out4 = 0x7FFFFFFF ^ (in4 >> 31);
|
||||
|
||||
/* Store result destination */
|
||||
*(pDst + 2) = out3;
|
||||
*(pDst + 3) = out4;
|
||||
|
||||
/* Update pointers to process next sampels */
|
||||
pSrc += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
kShift = -kShift;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* read four inputs from source */
|
||||
in1 = *pSrc;
|
||||
in2 = *(pSrc + 1);
|
||||
in3 = *(pSrc + 2);
|
||||
in4 = *(pSrc + 3);
|
||||
|
||||
/* multiply input with scaler value */
|
||||
in1 = ((q63_t) in1 * scaleFract) >> 32;
|
||||
in2 = ((q63_t) in2 * scaleFract) >> 32;
|
||||
in3 = ((q63_t) in3 * scaleFract) >> 32;
|
||||
in4 = ((q63_t) in4 * scaleFract) >> 32;
|
||||
|
||||
/* apply shifting */
|
||||
out1 = in1 >> kShift;
|
||||
out2 = in2 >> kShift;
|
||||
|
||||
out3 = in3 >> kShift;
|
||||
out4 = in4 >> kShift;
|
||||
|
||||
/* Store result destination */
|
||||
*pDst = out1;
|
||||
*(pDst + 1) = out2;
|
||||
|
||||
*(pDst + 2) = out3;
|
||||
*(pDst + 3) = out4;
|
||||
|
||||
/* Update pointers to process next sampels */
|
||||
pSrc += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * scale */
|
||||
/* Scale the input and then store the result in the destination buffer. */
|
||||
in = *pSrc++;
|
||||
in = ((q63_t) in * scaleFract) >> 32;
|
||||
|
||||
if(sign == 0)
|
||||
{
|
||||
out = in << kShift;
|
||||
if(in != (out >> kShift))
|
||||
out = 0x7FFFFFFF ^ (in >> 31);
|
||||
}
|
||||
else
|
||||
{
|
||||
out = in >> kShift;
|
||||
}
|
||||
|
||||
*pDst++ = out;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of scale group
|
||||
*/
|
|
@ -1,144 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_scale_q7.c
|
||||
*
|
||||
* Description: Multiplies a Q7 vector by a scalar.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup scale
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Multiplies a Q7 vector by a scalar.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] scaleFract fractional portion of the scale value
|
||||
* @param[in] shift number of bits to shift the result by
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.7 format.
|
||||
* These are multiplied to yield a 2.14 intermediate result and this is shifted with saturation to 1.7 format.
|
||||
*/
|
||||
|
||||
void arm_scale_q7(
|
||||
q7_t * pSrc,
|
||||
q7_t scaleFract,
|
||||
int8_t shift,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
int8_t kShift = 7 - shift; /* shift to apply after scaling */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q7_t in1, in2, in3, in4, out1, out2, out3, out4; /* Temporary variables to store input & output */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Reading 4 inputs from memory */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
in3 = *pSrc++;
|
||||
in4 = *pSrc++;
|
||||
|
||||
/* C = A * scale */
|
||||
/* Scale the inputs and then store the results in the temporary variables. */
|
||||
out1 = (q7_t) (__SSAT(((in1) * scaleFract) >> kShift, 8));
|
||||
out2 = (q7_t) (__SSAT(((in2) * scaleFract) >> kShift, 8));
|
||||
out3 = (q7_t) (__SSAT(((in3) * scaleFract) >> kShift, 8));
|
||||
out4 = (q7_t) (__SSAT(((in4) * scaleFract) >> kShift, 8));
|
||||
|
||||
/* Packing the individual outputs into 32bit and storing in
|
||||
* destination buffer in single write */
|
||||
*__SIMD32(pDst)++ = __PACKq7(out1, out2, out3, out4);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * scale */
|
||||
/* Scale the input and then store the result in the destination buffer. */
|
||||
*pDst++ = (q7_t) (__SSAT(((*pSrc++) * scaleFract) >> kShift, 8));
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A * scale */
|
||||
/* Scale the input and then store the result in the destination buffer. */
|
||||
*pDst++ = (q7_t) (__SSAT((((q15_t) * pSrc++ * scaleFract) >> kShift), 8));
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of scale group
|
||||
*/
|
|
@ -1,243 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_shift_q15.c
|
||||
*
|
||||
* Description: Shifts the elements of a Q15 vector by a specified number of bits.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup shift
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Shifts the elements of a Q15 vector a specified number of bits.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_shift_q15(
|
||||
q15_t * pSrc,
|
||||
int8_t shiftBits,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
uint8_t sign; /* Sign of shiftBits */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q15_t in1, in2; /* Temporary variables */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* Getting the sign of shiftBits */
|
||||
sign = (shiftBits & 0x80);
|
||||
|
||||
/* If the shift value is positive then do right shift else left shift */
|
||||
if(sign == 0u)
|
||||
{
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Read 2 inputs */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
/* C = A << shiftBits */
|
||||
/* Shift the inputs and then store the results in the destination buffer. */
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
*__SIMD32(pDst)++ = __PKHBT(__SSAT((in1 << shiftBits), 16),
|
||||
__SSAT((in2 << shiftBits), 16), 16);
|
||||
|
||||
#else
|
||||
|
||||
*__SIMD32(pDst)++ = __PKHBT(__SSAT((in2 << shiftBits), 16),
|
||||
__SSAT((in1 << shiftBits), 16), 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
*__SIMD32(pDst)++ = __PKHBT(__SSAT((in1 << shiftBits), 16),
|
||||
__SSAT((in2 << shiftBits), 16), 16);
|
||||
|
||||
#else
|
||||
|
||||
*__SIMD32(pDst)++ = __PKHBT(__SSAT((in2 << shiftBits), 16),
|
||||
__SSAT((in1 << shiftBits), 16), 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A << shiftBits */
|
||||
/* Shift and then store the results in the destination buffer. */
|
||||
*pDst++ = __SSAT((*pSrc++ << shiftBits), 16);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Read 2 inputs */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
|
||||
/* C = A >> shiftBits */
|
||||
/* Shift the inputs and then store the results in the destination buffer. */
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
*__SIMD32(pDst)++ = __PKHBT((in1 >> -shiftBits),
|
||||
(in2 >> -shiftBits), 16);
|
||||
|
||||
#else
|
||||
|
||||
*__SIMD32(pDst)++ = __PKHBT((in2 >> -shiftBits),
|
||||
(in1 >> -shiftBits), 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
*__SIMD32(pDst)++ = __PKHBT((in1 >> -shiftBits),
|
||||
(in2 >> -shiftBits), 16);
|
||||
|
||||
#else
|
||||
|
||||
*__SIMD32(pDst)++ = __PKHBT((in2 >> -shiftBits),
|
||||
(in1 >> -shiftBits), 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A >> shiftBits */
|
||||
/* Shift the inputs and then store the results in the destination buffer. */
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Getting the sign of shiftBits */
|
||||
sign = (shiftBits & 0x80);
|
||||
|
||||
/* If the shift value is positive then do right shift else left shift */
|
||||
if(sign == 0u)
|
||||
{
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A << shiftBits */
|
||||
/* Shift and then store the results in the destination buffer. */
|
||||
*pDst++ = __SSAT(((q31_t) * pSrc++ << shiftBits), 16);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A >> shiftBits */
|
||||
/* Shift the inputs and then store the results in the destination buffer. */
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of shift group
|
||||
*/
|
|
@ -1,195 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_shift_q31.c
|
||||
*
|
||||
* Description: Shifts the elements of a Q31 vector by a specified number of bits.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
/**
|
||||
* @defgroup shift Vector Shift
|
||||
*
|
||||
* Shifts the elements of a fixed-point vector by a specified number of bits.
|
||||
* There are separate functions for Q7, Q15, and Q31 data types.
|
||||
* The underlying algorithm used is:
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = pSrc[n] << shift, 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
* If <code>shift</code> is positive then the elements of the vector are shifted to the left.
|
||||
* If <code>shift</code> is negative then the elements of the vector are shifted to the right.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup shift
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Shifts the elements of a Q31 vector a specified number of bits.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_shift_q31(
|
||||
q31_t * pSrc,
|
||||
int8_t shiftBits,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
uint8_t sign = (shiftBits & 0x80); /* Sign of shiftBits */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
q31_t in1, in2, in3, in4; /* Temporary input variables */
|
||||
q31_t out1, out2, out3, out4; /* Temporary output variables */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
|
||||
if(sign == 0u)
|
||||
{
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A << shiftBits */
|
||||
/* Shift the input and then store the results in the destination buffer. */
|
||||
in1 = *pSrc;
|
||||
in2 = *(pSrc + 1);
|
||||
out1 = in1 << shiftBits;
|
||||
in3 = *(pSrc + 2);
|
||||
out2 = in2 << shiftBits;
|
||||
in4 = *(pSrc + 3);
|
||||
if(in1 != (out1 >> shiftBits))
|
||||
out1 = 0x7FFFFFFF ^ (in1 >> 31);
|
||||
|
||||
if(in2 != (out2 >> shiftBits))
|
||||
out2 = 0x7FFFFFFF ^ (in2 >> 31);
|
||||
|
||||
*pDst = out1;
|
||||
out3 = in3 << shiftBits;
|
||||
*(pDst + 1) = out2;
|
||||
out4 = in4 << shiftBits;
|
||||
|
||||
if(in3 != (out3 >> shiftBits))
|
||||
out3 = 0x7FFFFFFF ^ (in3 >> 31);
|
||||
|
||||
if(in4 != (out4 >> shiftBits))
|
||||
out4 = 0x7FFFFFFF ^ (in4 >> 31);
|
||||
|
||||
*(pDst + 2) = out3;
|
||||
*(pDst + 3) = out4;
|
||||
|
||||
/* Update destination pointer to process next sampels */
|
||||
pSrc += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A >> shiftBits */
|
||||
/* Shift the input and then store the results in the destination buffer. */
|
||||
in1 = *pSrc;
|
||||
in2 = *(pSrc + 1);
|
||||
in3 = *(pSrc + 2);
|
||||
in4 = *(pSrc + 3);
|
||||
|
||||
*pDst = (in1 >> -shiftBits);
|
||||
*(pDst + 1) = (in2 >> -shiftBits);
|
||||
*(pDst + 2) = (in3 >> -shiftBits);
|
||||
*(pDst + 3) = (in4 >> -shiftBits);
|
||||
|
||||
|
||||
pSrc += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A (>> or <<) shiftBits */
|
||||
/* Shift the input and then store the result in the destination buffer. */
|
||||
*pDst++ = (sign == 0u) ? clip_q63_to_q31((q63_t) * pSrc++ << shiftBits) :
|
||||
(*pSrc++ >> -shiftBits);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of shift group
|
||||
*/
|
|
@ -1,215 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_shift_q7.c
|
||||
*
|
||||
* Description: Processing function for the Q7 Shifting
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup shift
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Shifts the elements of a Q7 vector a specified number of bits.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in the vector
|
||||
* @return none.
|
||||
*
|
||||
* \par Conditions for optimum performance
|
||||
* Input and output buffers should be aligned by 32-bit
|
||||
*
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q7 range [0x8 0x7F] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_shift_q7(
|
||||
q7_t * pSrc,
|
||||
int8_t shiftBits,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
uint8_t sign; /* Sign of shiftBits */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q7_t in1; /* Input value1 */
|
||||
q7_t in2; /* Input value2 */
|
||||
q7_t in3; /* Input value3 */
|
||||
q7_t in4; /* Input value4 */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* Getting the sign of shiftBits */
|
||||
sign = (shiftBits & 0x80);
|
||||
|
||||
/* If the shift value is positive then do right shift else left shift */
|
||||
if(sign == 0u)
|
||||
{
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A << shiftBits */
|
||||
/* Read 4 inputs */
|
||||
in1 = *pSrc;
|
||||
in2 = *(pSrc + 1);
|
||||
in3 = *(pSrc + 2);
|
||||
in4 = *(pSrc + 3);
|
||||
|
||||
/* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
|
||||
*__SIMD32(pDst)++ = __PACKq7(__SSAT((in1 << shiftBits), 8),
|
||||
__SSAT((in2 << shiftBits), 8),
|
||||
__SSAT((in3 << shiftBits), 8),
|
||||
__SSAT((in4 << shiftBits), 8));
|
||||
/* Update source pointer to process next sampels */
|
||||
pSrc += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A << shiftBits */
|
||||
/* Shift the input and then store the result in the destination buffer. */
|
||||
*pDst++ = (q7_t) __SSAT((*pSrc++ << shiftBits), 8);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
shiftBits = -shiftBits;
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A >> shiftBits */
|
||||
/* Read 4 inputs */
|
||||
in1 = *pSrc;
|
||||
in2 = *(pSrc + 1);
|
||||
in3 = *(pSrc + 2);
|
||||
in4 = *(pSrc + 3);
|
||||
|
||||
/* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
|
||||
*__SIMD32(pDst)++ = __PACKq7((in1 >> shiftBits), (in2 >> shiftBits),
|
||||
(in3 >> shiftBits), (in4 >> shiftBits));
|
||||
|
||||
|
||||
pSrc += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A >> shiftBits */
|
||||
/* Shift the input and then store the result in the destination buffer. */
|
||||
in1 = *pSrc++;
|
||||
*pDst++ = (in1 >> shiftBits);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Getting the sign of shiftBits */
|
||||
sign = (shiftBits & 0x80);
|
||||
|
||||
/* If the shift value is positive then do right shift else left shift */
|
||||
if(sign == 0u)
|
||||
{
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A << shiftBits */
|
||||
/* Shift the input and then store the result in the destination buffer. */
|
||||
*pDst++ = (q7_t) __SSAT(((q15_t) * pSrc++ << shiftBits), 8);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A >> shiftBits */
|
||||
/* Shift the input and then store the result in the destination buffer. */
|
||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of shift group
|
||||
*/
|
|
@ -1,145 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sub_f32.c
|
||||
*
|
||||
* Description: Floating-point vector subtraction.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup BasicSub Vector Subtraction
|
||||
*
|
||||
* Element-by-element subtraction of two vectors.
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = pSrcA[n] - pSrcB[n], 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicSub
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Floating-point vector subtraction.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_sub_f32(
|
||||
float32_t * pSrcA,
|
||||
float32_t * pSrcB,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
float32_t inA1, inA2, inA3, inA4; /* temporary variables */
|
||||
float32_t inB1, inB2, inB3, inB4; /* temporary variables */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the results in the destination buffer. */
|
||||
/* Read 4 input samples from sourceA and sourceB */
|
||||
inA1 = *pSrcA;
|
||||
inB1 = *pSrcB;
|
||||
inA2 = *(pSrcA + 1);
|
||||
inB2 = *(pSrcB + 1);
|
||||
inA3 = *(pSrcA + 2);
|
||||
inB3 = *(pSrcB + 2);
|
||||
inA4 = *(pSrcA + 3);
|
||||
inB4 = *(pSrcB + 3);
|
||||
|
||||
/* dst = srcA - srcB */
|
||||
/* subtract and store the result */
|
||||
*pDst = inA1 - inB1;
|
||||
*(pDst + 1) = inA2 - inB2;
|
||||
*(pDst + 2) = inA3 - inB3;
|
||||
*(pDst + 3) = inA4 - inB4;
|
||||
|
||||
|
||||
/* Update pointers to process next sampels */
|
||||
pSrcA += 4u;
|
||||
pSrcB += 4u;
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the results in the destination buffer. */
|
||||
*pDst++ = (*pSrcA++) - (*pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicSub group
|
||||
*/
|
|
@ -1,135 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sub_q15.c
|
||||
*
|
||||
* Description: Q15 vector subtraction.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicSub
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q15 vector subtraction.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_sub_q15(
|
||||
q15_t * pSrcA,
|
||||
q15_t * pSrcB,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t inA1, inA2;
|
||||
q31_t inB1, inB2;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the results in the destination buffer two samples at a time. */
|
||||
inA1 = *__SIMD32(pSrcA)++;
|
||||
inA2 = *__SIMD32(pSrcA)++;
|
||||
inB1 = *__SIMD32(pSrcB)++;
|
||||
inB2 = *__SIMD32(pSrcB)++;
|
||||
|
||||
*__SIMD32(pDst)++ = __QSUB16(inA1, inB1);
|
||||
*__SIMD32(pDst)++ = __QSUB16(inA2, inB2);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the result in the destination buffer. */
|
||||
*pDst++ = (q15_t) __QSUB16(*pSrcA++, *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the result in the destination buffer. */
|
||||
*pDst++ = (q15_t) __SSAT(((q31_t) * pSrcA++ - *pSrcB++), 16);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicSub group
|
||||
*/
|
|
@ -1,141 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sub_q31.c
|
||||
*
|
||||
* Description: Q31 vector subtraction.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicSub
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q31 vector subtraction.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_sub_q31(
|
||||
q31_t * pSrcA,
|
||||
q31_t * pSrcB,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t inA1, inA2, inA3, inA4;
|
||||
q31_t inB1, inB2, inB3, inB4;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the results in the destination buffer. */
|
||||
inA1 = *pSrcA++;
|
||||
inA2 = *pSrcA++;
|
||||
inB1 = *pSrcB++;
|
||||
inB2 = *pSrcB++;
|
||||
|
||||
inA3 = *pSrcA++;
|
||||
inA4 = *pSrcA++;
|
||||
inB3 = *pSrcB++;
|
||||
inB4 = *pSrcB++;
|
||||
|
||||
*pDst++ = __QSUB(inA1, inB1);
|
||||
*pDst++ = __QSUB(inA2, inB2);
|
||||
*pDst++ = __QSUB(inA3, inB3);
|
||||
*pDst++ = __QSUB(inA4, inB4);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the result in the destination buffer. */
|
||||
*pDst++ = __QSUB(*pSrcA++, *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the result in the destination buffer. */
|
||||
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrcA++ - *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicSub group
|
||||
*/
|
|
@ -1,126 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sub_q7.c
|
||||
*
|
||||
* Description: Q7 vector subtraction.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BasicSub
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q7 vector subtraction.
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_sub_q7(
|
||||
q7_t * pSrcA,
|
||||
q7_t * pSrcB,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the results in the destination buffer 4 samples at a time. */
|
||||
*__SIMD32(pDst)++ = __QSUB8(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the result in the destination buffer. */
|
||||
*pDst++ = __SSAT(*pSrcA++ - *pSrcB++, 8);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A - B */
|
||||
/* Subtract and then store the result in the destination buffer. */
|
||||
*pDst++ = (q7_t) __SSAT((q15_t) * pSrcA++ - *pSrcB++, 8);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicSub group
|
||||
*/
|
File diff suppressed because it is too large
Load Diff
|
@ -1,174 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_conj_f32.c
|
||||
*
|
||||
* Description: Floating-point complex conjugate.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup cmplx_conj Complex Conjugate
|
||||
*
|
||||
* Conjugates the elements of a complex data vector.
|
||||
*
|
||||
* The <code>pSrc</code> points to the source data and
|
||||
* <code>pDst</code> points to the where the result should be written.
|
||||
* <code>numSamples</code> specifies the number of complex samples
|
||||
* and the data in each array is stored in an interleaved fashion
|
||||
* (real, imag, real, imag, ...).
|
||||
* Each array has a total of <code>2*numSamples</code> values.
|
||||
* The underlying algorithm is used:
|
||||
*
|
||||
* <pre>
|
||||
* for(n=0; n<numSamples; n++) {
|
||||
* pDst[(2*n)+0)] = pSrc[(2*n)+0]; // real part
|
||||
* pDst[(2*n)+1)] = -pSrc[(2*n)+1]; // imag part
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating-point, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cmplx_conj
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Floating-point complex conjugate.
|
||||
* @param *pSrc points to the input vector
|
||||
* @param *pDst points to the output vector
|
||||
* @param numSamples number of complex samples in each vector
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_cmplx_conj_f32(
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
float32_t inR1, inR2, inR3, inR4;
|
||||
float32_t inI1, inI2, inI3, inI4;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
|
||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||
/* read real input samples */
|
||||
inR1 = pSrc[0];
|
||||
/* store real samples to destination */
|
||||
pDst[0] = inR1;
|
||||
inR2 = pSrc[2];
|
||||
pDst[2] = inR2;
|
||||
inR3 = pSrc[4];
|
||||
pDst[4] = inR3;
|
||||
inR4 = pSrc[6];
|
||||
pDst[6] = inR4;
|
||||
|
||||
/* read imaginary input samples */
|
||||
inI1 = pSrc[1];
|
||||
inI2 = pSrc[3];
|
||||
|
||||
/* conjugate input */
|
||||
inI1 = -inI1;
|
||||
|
||||
/* read imaginary input samples */
|
||||
inI3 = pSrc[5];
|
||||
|
||||
/* conjugate input */
|
||||
inI2 = -inI2;
|
||||
|
||||
/* read imaginary input samples */
|
||||
inI4 = pSrc[7];
|
||||
|
||||
/* conjugate input */
|
||||
inI3 = -inI3;
|
||||
|
||||
/* store imaginary samples to destination */
|
||||
pDst[1] = inI1;
|
||||
pDst[3] = inI2;
|
||||
|
||||
/* conjugate input */
|
||||
inI4 = -inI4;
|
||||
|
||||
/* store imaginary samples to destination */
|
||||
pDst[5] = inI3;
|
||||
|
||||
/* increment source pointer by 8 to process next sampels */
|
||||
pSrc += 8u;
|
||||
|
||||
/* store imaginary sample to destination */
|
||||
pDst[7] = inI4;
|
||||
|
||||
/* increment destination pointer by 8 to store next samples */
|
||||
pDst += 8u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
blkCnt = numSamples;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* realOut + j (imagOut) = realIn + j (-1) imagIn */
|
||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = -*pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cmplx_conj group
|
||||
*/
|
|
@ -1,153 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_conj_q15.c
|
||||
*
|
||||
* Description: Q15 complex conjugate.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cmplx_conj
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q15 complex conjugate.
|
||||
* @param *pSrc points to the input vector
|
||||
* @param *pDst points to the output vector
|
||||
* @param numSamples number of complex samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
|
||||
*/
|
||||
|
||||
void arm_cmplx_conj_q15(
|
||||
q15_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
q31_t in1, in2, in3, in4;
|
||||
q31_t zero = 0;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
|
||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||
in1 = *__SIMD32(pSrc)++;
|
||||
in2 = *__SIMD32(pSrc)++;
|
||||
in3 = *__SIMD32(pSrc)++;
|
||||
in4 = *__SIMD32(pSrc)++;
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
in1 = __QASX(zero, in1);
|
||||
in2 = __QASX(zero, in2);
|
||||
in3 = __QASX(zero, in3);
|
||||
in4 = __QASX(zero, in4);
|
||||
|
||||
#else
|
||||
|
||||
in1 = __QSAX(zero, in1);
|
||||
in2 = __QSAX(zero, in2);
|
||||
in3 = __QSAX(zero, in3);
|
||||
in4 = __QSAX(zero, in4);
|
||||
|
||||
#endif // #ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
in1 = ((uint32_t) in1 >> 16) | ((uint32_t) in1 << 16);
|
||||
in2 = ((uint32_t) in2 >> 16) | ((uint32_t) in2 << 16);
|
||||
in3 = ((uint32_t) in3 >> 16) | ((uint32_t) in3 << 16);
|
||||
in4 = ((uint32_t) in4 >> 16) | ((uint32_t) in4 << 16);
|
||||
|
||||
*__SIMD32(pDst)++ = in1;
|
||||
*__SIMD32(pDst)++ = in2;
|
||||
*__SIMD32(pDst)++ = in3;
|
||||
*__SIMD32(pDst)++ = in4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
|
||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = __SSAT(-*pSrc++, 16);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
q15_t in;
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
while(numSamples > 0u)
|
||||
{
|
||||
/* realOut + j (imagOut) = realIn+ j (-1) imagIn */
|
||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||
*pDst++ = *pSrc++;
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in == (q15_t) 0x8000) ? 0x7fff : -in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
numSamples--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cmplx_conj group
|
||||
*/
|
|
@ -1,172 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_conj_q31.c
|
||||
*
|
||||
* Description: Q31 complex conjugate.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cmplx_conj
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q31 complex conjugate.
|
||||
* @param *pSrc points to the input vector
|
||||
* @param *pDst points to the output vector
|
||||
* @param numSamples number of complex samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.
|
||||
*/
|
||||
|
||||
void arm_cmplx_conj_q31(
|
||||
q31_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
q31_t in; /* Input value */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t inR1, inR2, inR3, inR4; /* Temporary real variables */
|
||||
q31_t inI1, inI2, inI3, inI4; /* Temporary imaginary variables */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
|
||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||
/* Saturated to 0x7fffffff if the input is -1(0x80000000) */
|
||||
/* read real input sample */
|
||||
inR1 = pSrc[0];
|
||||
/* store real input sample */
|
||||
pDst[0] = inR1;
|
||||
|
||||
/* read imaginary input sample */
|
||||
inI1 = pSrc[1];
|
||||
|
||||
/* read real input sample */
|
||||
inR2 = pSrc[2];
|
||||
/* store real input sample */
|
||||
pDst[2] = inR2;
|
||||
|
||||
/* read imaginary input sample */
|
||||
inI2 = pSrc[3];
|
||||
|
||||
/* negate imaginary input sample */
|
||||
inI1 = __QSUB(0, inI1);
|
||||
|
||||
/* read real input sample */
|
||||
inR3 = pSrc[4];
|
||||
/* store real input sample */
|
||||
pDst[4] = inR3;
|
||||
|
||||
/* read imaginary input sample */
|
||||
inI3 = pSrc[5];
|
||||
|
||||
/* negate imaginary input sample */
|
||||
inI2 = __QSUB(0, inI2);
|
||||
|
||||
/* read real input sample */
|
||||
inR4 = pSrc[6];
|
||||
/* store real input sample */
|
||||
pDst[6] = inR4;
|
||||
|
||||
/* negate imaginary input sample */
|
||||
inI3 = __QSUB(0, inI3);
|
||||
|
||||
/* store imaginary input sample */
|
||||
inI4 = pSrc[7];
|
||||
|
||||
/* store imaginary input samples */
|
||||
pDst[1] = inI1;
|
||||
|
||||
/* negate imaginary input sample */
|
||||
inI4 = __QSUB(0, inI4);
|
||||
|
||||
/* store imaginary input samples */
|
||||
pDst[3] = inI2;
|
||||
|
||||
/* increment source pointer by 8 to proecess next samples */
|
||||
pSrc += 8u;
|
||||
|
||||
/* store imaginary input samples */
|
||||
pDst[5] = inI3;
|
||||
pDst[7] = inI4;
|
||||
|
||||
/* increment destination pointer by 8 to process next samples */
|
||||
pDst += 8u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
blkCnt = numSamples;
|
||||
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
|
||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||
/* Saturated to 0x7fffffff if the input is -1(0x80000000) */
|
||||
*pDst++ = *pSrc++;
|
||||
in = *pSrc++;
|
||||
*pDst++ = (in == 0x80000000) ? 0x7fffffff : -in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cmplx_conj group
|
||||
*/
|
|
@ -1,160 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_dot_prod_f32.c
|
||||
*
|
||||
* Description: Floating-point complex dot product
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup cmplx_dot_prod Complex Dot Product
|
||||
*
|
||||
* Computes the dot product of two complex vectors.
|
||||
* The vectors are multiplied element-by-element and then summed.
|
||||
*
|
||||
* The <code>pSrcA</code> points to the first complex input vector and
|
||||
* <code>pSrcB</code> points to the second complex input vector.
|
||||
* <code>numSamples</code> specifies the number of complex samples
|
||||
* and the data in each array is stored in an interleaved fashion
|
||||
* (real, imag, real, imag, ...).
|
||||
* Each array has a total of <code>2*numSamples</code> values.
|
||||
*
|
||||
* The underlying algorithm is used:
|
||||
* <pre>
|
||||
* realResult=0;
|
||||
* imagResult=0;
|
||||
* for(n=0; n<numSamples; n++) {
|
||||
* realResult += pSrcA[(2*n)+0]*pSrcB[(2*n)+0] - pSrcA[(2*n)+1]*pSrcB[(2*n)+1];
|
||||
* imagResult += pSrcA[(2*n)+0]*pSrcB[(2*n)+1] + pSrcA[(2*n)+1]*pSrcB[(2*n)+0];
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating-point, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cmplx_dot_prod
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Floating-point complex dot product
|
||||
* @param *pSrcA points to the first input vector
|
||||
* @param *pSrcB points to the second input vector
|
||||
* @param numSamples number of complex samples in each vector
|
||||
* @param *realResult real part of the result returned here
|
||||
* @param *imagResult imaginary part of the result returned here
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_cmplx_dot_prod_f32(
|
||||
float32_t * pSrcA,
|
||||
float32_t * pSrcB,
|
||||
uint32_t numSamples,
|
||||
float32_t * realResult,
|
||||
float32_t * imagResult)
|
||||
{
|
||||
float32_t real_sum = 0.0f, imag_sum = 0.0f; /* Temporary result storage */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
|
||||
real_sum += (*pSrcA++) * (*pSrcB++);
|
||||
/* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
|
||||
imag_sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
real_sum += (*pSrcA++) * (*pSrcB++);
|
||||
imag_sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
real_sum += (*pSrcA++) * (*pSrcB++);
|
||||
imag_sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
real_sum += (*pSrcA++) * (*pSrcB++);
|
||||
imag_sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
|
||||
real_sum += (*pSrcA++) * (*pSrcB++);
|
||||
/* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
|
||||
imag_sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
while(numSamples > 0u)
|
||||
{
|
||||
/* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
|
||||
real_sum += (*pSrcA++) * (*pSrcB++);
|
||||
/* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
|
||||
imag_sum += (*pSrcA++) * (*pSrcB++);
|
||||
|
||||
|
||||
/* Decrement the loop counter */
|
||||
numSamples--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
/* Store the real and imaginary results in the destination buffers */
|
||||
*realResult = real_sum;
|
||||
*imagResult = imag_sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cmplx_dot_prod group
|
||||
*/
|
|
@ -1,144 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_dot_prod_q15.c
|
||||
*
|
||||
* Description: Processing function for the Q15 Complex Dot product
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cmplx_dot_prod
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q15 complex dot product
|
||||
* @param *pSrcA points to the first input vector
|
||||
* @param *pSrcB points to the second input vector
|
||||
* @param numSamples number of complex samples in each vector
|
||||
* @param *realResult real part of the result returned here
|
||||
* @param *imagResult imaginary part of the result returned here
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function is implemented using an internal 64-bit accumulator.
|
||||
* The intermediate 1.15 by 1.15 multiplications are performed with full precision and yield a 2.30 result.
|
||||
* These are accumulated in a 64-bit accumulator with 34.30 precision.
|
||||
* As a final step, the accumulators are converted to 8.24 format.
|
||||
* The return results <code>realResult</code> and <code>imagResult</code> are in 8.24 format.
|
||||
*/
|
||||
|
||||
void arm_cmplx_dot_prod_q15(
|
||||
q15_t * pSrcA,
|
||||
q15_t * pSrcB,
|
||||
uint32_t numSamples,
|
||||
q31_t * realResult,
|
||||
q31_t * imagResult)
|
||||
{
|
||||
q63_t real_sum = 0, imag_sum = 0; /* Temporary result storage */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
|
||||
real_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
|
||||
/* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
|
||||
imag_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
|
||||
real_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
imag_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
|
||||
real_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
imag_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
|
||||
real_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
imag_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
|
||||
real_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
/* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
|
||||
imag_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
while(numSamples > 0u)
|
||||
{
|
||||
/* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
|
||||
real_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
/* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
|
||||
imag_sum += ((q31_t) * pSrcA++ * *pSrcB++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
numSamples--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
/* Store the real and imaginary results in 8.24 format */
|
||||
/* Convert real data in 34.30 to 8.24 by 6 right shifts */
|
||||
*realResult = (q31_t) (real_sum) >> 6;
|
||||
/* Convert imaginary data in 34.30 to 8.24 by 6 right shifts */
|
||||
*imagResult = (q31_t) (imag_sum) >> 6;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cmplx_dot_prod group
|
||||
*/
|
|
@ -1,145 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_dot_prod_q31.c
|
||||
*
|
||||
* Description: Q31 complex dot product
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cmplx_dot_prod
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q31 complex dot product
|
||||
* @param *pSrcA points to the first input vector
|
||||
* @param *pSrcB points to the second input vector
|
||||
* @param numSamples number of complex samples in each vector
|
||||
* @param *realResult real part of the result returned here
|
||||
* @param *imagResult imaginary part of the result returned here
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function is implemented using an internal 64-bit accumulator.
|
||||
* The intermediate 1.31 by 1.31 multiplications are performed with 64-bit precision and then shifted to 16.48 format.
|
||||
* The internal real and imaginary accumulators are in 16.48 format and provide 15 guard bits.
|
||||
* Additions are nonsaturating and no overflow will occur as long as <code>numSamples</code> is less than 32768.
|
||||
* The return results <code>realResult</code> and <code>imagResult</code> are in 16.48 format.
|
||||
* Input down scaling is not required.
|
||||
*/
|
||||
|
||||
void arm_cmplx_dot_prod_q31(
|
||||
q31_t * pSrcA,
|
||||
q31_t * pSrcB,
|
||||
uint32_t numSamples,
|
||||
q63_t * realResult,
|
||||
q63_t * imagResult)
|
||||
{
|
||||
q63_t real_sum = 0, imag_sum = 0; /* Temporary result storage */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
|
||||
/* Convert real data in 2.62 to 16.48 by 14 right shifts */
|
||||
real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
/* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
|
||||
/* Convert imag data in 2.62 to 16.48 by 14 right shifts */
|
||||
imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
|
||||
real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
|
||||
real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
|
||||
real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
|
||||
real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
/* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
|
||||
imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
while(numSamples > 0u)
|
||||
{
|
||||
/* outReal = realA[0]* realB[0] + realA[2]* realB[2] + realA[4]* realB[4] + .....+ realA[numSamples-2]* realB[numSamples-2] */
|
||||
real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
/* outImag = imagA[1]* imagB[1] + imagA[3]* imagB[3] + imagA[5]* imagB[5] + .....+ imagA[numSamples-1]* imagB[numSamples-1] */
|
||||
imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
numSamples--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
/* Store the real and imaginary results in 16.48 format */
|
||||
*realResult = real_sum;
|
||||
*imagResult = imag_sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cmplx_dot_prod group
|
||||
*/
|
|
@ -1,157 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_mag_f32.c
|
||||
*
|
||||
* Description: Floating-point complex magnitude.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup cmplx_mag Complex Magnitude
|
||||
*
|
||||
* Computes the magnitude of the elements of a complex data vector.
|
||||
*
|
||||
* The <code>pSrc</code> points to the source data and
|
||||
* <code>pDst</code> points to the where the result should be written.
|
||||
* <code>numSamples</code> specifies the number of complex samples
|
||||
* in the input array and the data is stored in an interleaved fashion
|
||||
* (real, imag, real, imag, ...).
|
||||
* The input array has a total of <code>2*numSamples</code> values;
|
||||
* the output array has a total of <code>numSamples</code> values.
|
||||
* The underlying algorithm is used:
|
||||
*
|
||||
* <pre>
|
||||
* for(n=0; n<numSamples; n++) {
|
||||
* pDst[n] = sqrt(pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2);
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating-point, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cmplx_mag
|
||||
* @{
|
||||
*/
|
||||
/**
|
||||
* @brief Floating-point complex magnitude.
|
||||
* @param[in] *pSrc points to complex input buffer
|
||||
* @param[out] *pDst points to real output buffer
|
||||
* @param[in] numSamples number of complex samples in the input vector
|
||||
* @return none.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
void arm_cmplx_mag_f32(
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
float32_t realIn, imagIn; /* Temporary variables to hold input values */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
|
||||
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
|
||||
realIn = *pSrc++;
|
||||
imagIn = *pSrc++;
|
||||
/* store the result in the destination buffer. */
|
||||
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
||||
|
||||
realIn = *pSrc++;
|
||||
imagIn = *pSrc++;
|
||||
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
||||
|
||||
realIn = *pSrc++;
|
||||
imagIn = *pSrc++;
|
||||
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
||||
|
||||
realIn = *pSrc++;
|
||||
imagIn = *pSrc++;
|
||||
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
||||
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
|
||||
realIn = *pSrc++;
|
||||
imagIn = *pSrc++;
|
||||
/* store the result in the destination buffer. */
|
||||
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
while(numSamples > 0u)
|
||||
{
|
||||
/* out = sqrt((real * real) + (imag * imag)) */
|
||||
realIn = *pSrc++;
|
||||
imagIn = *pSrc++;
|
||||
/* store the result in the destination buffer. */
|
||||
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
numSamples--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cmplx_mag group
|
||||
*/
|
|
@ -1,145 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_mag_q15.c
|
||||
*
|
||||
* Description: Q15 complex magnitude.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cmplx_mag
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Q15 complex magnitude
|
||||
* @param *pSrc points to the complex input vector
|
||||
* @param *pDst points to the real output vector
|
||||
* @param numSamples number of complex samples in the input vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function implements 1.15 by 1.15 multiplications and finally output is converted into 2.14 format.
|
||||
*/
|
||||
|
||||
void arm_cmplx_mag_q15(
|
||||
q15_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
q31_t acc0, acc1; /* Accumulators */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
q31_t in1, in2, in3, in4;
|
||||
q31_t acc2, acc3;
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
|
||||
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
|
||||
in1 = *__SIMD32(pSrc)++;
|
||||
in2 = *__SIMD32(pSrc)++;
|
||||
in3 = *__SIMD32(pSrc)++;
|
||||
in4 = *__SIMD32(pSrc)++;
|
||||
|
||||
acc0 = __SMUAD(in1, in1);
|
||||
acc1 = __SMUAD(in2, in2);
|
||||
acc2 = __SMUAD(in3, in3);
|
||||
acc3 = __SMUAD(in4, in4);
|
||||
|
||||
/* store the result in 2.14 format in the destination buffer. */
|
||||
arm_sqrt_q15((q15_t) ((acc0) >> 17), pDst++);
|
||||
arm_sqrt_q15((q15_t) ((acc1) >> 17), pDst++);
|
||||
arm_sqrt_q15((q15_t) ((acc2) >> 17), pDst++);
|
||||
arm_sqrt_q15((q15_t) ((acc3) >> 17), pDst++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
|
||||
in1 = *__SIMD32(pSrc)++;
|
||||
acc0 = __SMUAD(in1, in1);
|
||||
|
||||
/* store the result in 2.14 format in the destination buffer. */
|
||||
arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
q15_t real, imag; /* Temporary variables to hold input values */
|
||||
|
||||
while(numSamples > 0u)
|
||||
{
|
||||
/* out = sqrt(real * real + imag * imag) */
|
||||
real = *pSrc++;
|
||||
imag = *pSrc++;
|
||||
|
||||
acc0 = (real * real);
|
||||
acc1 = (imag * imag);
|
||||
|
||||
/* store the result in 2.14 format in the destination buffer. */
|
||||
arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
numSamples--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cmplx_mag group
|
||||
*/
|
|
@ -1,177 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_mag_q31.c
|
||||
*
|
||||
* Description: Q31 complex magnitude
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cmplx_mag
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q31 complex magnitude
|
||||
* @param *pSrc points to the complex input vector
|
||||
* @param *pDst points to the real output vector
|
||||
* @param numSamples number of complex samples in the input vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function implements 1.31 by 1.31 multiplications and finally output is converted into 2.30 format.
|
||||
* Input down scaling is not required.
|
||||
*/
|
||||
|
||||
void arm_cmplx_mag_q31(
|
||||
q31_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
q31_t real, imag; /* Temporary variables to hold input values */
|
||||
q31_t acc0, acc1; /* Accumulators */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t real1, real2, imag1, imag2; /* Temporary variables to hold input values */
|
||||
q31_t out1, out2, out3, out4; /* Accumulators */
|
||||
q63_t mul1, mul2, mul3, mul4; /* Temporary variables */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* read complex input from source buffer */
|
||||
real1 = pSrc[0];
|
||||
imag1 = pSrc[1];
|
||||
real2 = pSrc[2];
|
||||
imag2 = pSrc[3];
|
||||
|
||||
/* calculate power of input values */
|
||||
mul1 = (q63_t) real1 *real1;
|
||||
mul2 = (q63_t) imag1 *imag1;
|
||||
mul3 = (q63_t) real2 *real2;
|
||||
mul4 = (q63_t) imag2 *imag2;
|
||||
|
||||
/* get the result to 3.29 format */
|
||||
out1 = (q31_t) (mul1 >> 33);
|
||||
out2 = (q31_t) (mul2 >> 33);
|
||||
out3 = (q31_t) (mul3 >> 33);
|
||||
out4 = (q31_t) (mul4 >> 33);
|
||||
|
||||
/* add real and imaginary accumulators */
|
||||
out1 = out1 + out2;
|
||||
out3 = out3 + out4;
|
||||
|
||||
/* read complex input from source buffer */
|
||||
real1 = pSrc[4];
|
||||
imag1 = pSrc[5];
|
||||
real2 = pSrc[6];
|
||||
imag2 = pSrc[7];
|
||||
|
||||
/* calculate square root */
|
||||
arm_sqrt_q31(out1, &pDst[0]);
|
||||
|
||||
/* calculate power of input values */
|
||||
mul1 = (q63_t) real1 *real1;
|
||||
|
||||
/* calculate square root */
|
||||
arm_sqrt_q31(out3, &pDst[1]);
|
||||
|
||||
/* calculate power of input values */
|
||||
mul2 = (q63_t) imag1 *imag1;
|
||||
mul3 = (q63_t) real2 *real2;
|
||||
mul4 = (q63_t) imag2 *imag2;
|
||||
|
||||
/* get the result to 3.29 format */
|
||||
out1 = (q31_t) (mul1 >> 33);
|
||||
out2 = (q31_t) (mul2 >> 33);
|
||||
out3 = (q31_t) (mul3 >> 33);
|
||||
out4 = (q31_t) (mul4 >> 33);
|
||||
|
||||
/* add real and imaginary accumulators */
|
||||
out1 = out1 + out2;
|
||||
out3 = out3 + out4;
|
||||
|
||||
/* calculate square root */
|
||||
arm_sqrt_q31(out1, &pDst[2]);
|
||||
|
||||
/* increment destination by 8 to process next samples */
|
||||
pSrc += 8u;
|
||||
|
||||
/* calculate square root */
|
||||
arm_sqrt_q31(out3, &pDst[3]);
|
||||
|
||||
/* increment destination by 4 to process next samples */
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
blkCnt = numSamples;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
|
||||
real = *pSrc++;
|
||||
imag = *pSrc++;
|
||||
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
||||
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
||||
/* store the result in 2.30 format in the destination buffer. */
|
||||
arm_sqrt_q31(acc0 + acc1, pDst++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cmplx_mag group
|
||||
*/
|
|
@ -1,207 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_mag_squared_f32.c
|
||||
*
|
||||
* Description: Floating-point complex magnitude squared.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup cmplx_mag_squared Complex Magnitude Squared
|
||||
*
|
||||
* Computes the magnitude squared of the elements of a complex data vector.
|
||||
*
|
||||
* The <code>pSrc</code> points to the source data and
|
||||
* <code>pDst</code> points to the where the result should be written.
|
||||
* <code>numSamples</code> specifies the number of complex samples
|
||||
* in the input array and the data is stored in an interleaved fashion
|
||||
* (real, imag, real, imag, ...).
|
||||
* The input array has a total of <code>2*numSamples</code> values;
|
||||
* the output array has a total of <code>numSamples</code> values.
|
||||
*
|
||||
* The underlying algorithm is used:
|
||||
*
|
||||
* <pre>
|
||||
* for(n=0; n<numSamples; n++) {
|
||||
* pDst[n] = pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2;
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating-point, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cmplx_mag_squared
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Floating-point complex magnitude squared
|
||||
* @param[in] *pSrc points to the complex input vector
|
||||
* @param[out] *pDst points to the real output vector
|
||||
* @param[in] numSamples number of complex samples in the input vector
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_cmplx_mag_squared_f32(
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
float32_t real, imag; /* Temporary variables to store real and imaginary values */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
float32_t real1, real2, real3, real4; /* Temporary variables to hold real values */
|
||||
float32_t imag1, imag2, imag3, imag4; /* Temporary variables to hold imaginary values */
|
||||
float32_t mul1, mul2, mul3, mul4; /* Temporary variables */
|
||||
float32_t mul5, mul6, mul7, mul8; /* Temporary variables */
|
||||
float32_t out1, out2, out3, out4; /* Temporary variables to hold output values */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
|
||||
/* read real input sample from source buffer */
|
||||
real1 = pSrc[0];
|
||||
/* read imaginary input sample from source buffer */
|
||||
imag1 = pSrc[1];
|
||||
|
||||
/* calculate power of real value */
|
||||
mul1 = real1 * real1;
|
||||
|
||||
/* read real input sample from source buffer */
|
||||
real2 = pSrc[2];
|
||||
|
||||
/* calculate power of imaginary value */
|
||||
mul2 = imag1 * imag1;
|
||||
|
||||
/* read imaginary input sample from source buffer */
|
||||
imag2 = pSrc[3];
|
||||
|
||||
/* calculate power of real value */
|
||||
mul3 = real2 * real2;
|
||||
|
||||
/* read real input sample from source buffer */
|
||||
real3 = pSrc[4];
|
||||
|
||||
/* calculate power of imaginary value */
|
||||
mul4 = imag2 * imag2;
|
||||
|
||||
/* read imaginary input sample from source buffer */
|
||||
imag3 = pSrc[5];
|
||||
|
||||
/* calculate power of real value */
|
||||
mul5 = real3 * real3;
|
||||
/* calculate power of imaginary value */
|
||||
mul6 = imag3 * imag3;
|
||||
|
||||
/* read real input sample from source buffer */
|
||||
real4 = pSrc[6];
|
||||
|
||||
/* accumulate real and imaginary powers */
|
||||
out1 = mul1 + mul2;
|
||||
|
||||
/* read imaginary input sample from source buffer */
|
||||
imag4 = pSrc[7];
|
||||
|
||||
/* accumulate real and imaginary powers */
|
||||
out2 = mul3 + mul4;
|
||||
|
||||
/* calculate power of real value */
|
||||
mul7 = real4 * real4;
|
||||
/* calculate power of imaginary value */
|
||||
mul8 = imag4 * imag4;
|
||||
|
||||
/* store output to destination */
|
||||
pDst[0] = out1;
|
||||
|
||||
/* accumulate real and imaginary powers */
|
||||
out3 = mul5 + mul6;
|
||||
|
||||
/* store output to destination */
|
||||
pDst[1] = out2;
|
||||
|
||||
/* accumulate real and imaginary powers */
|
||||
out4 = mul7 + mul8;
|
||||
|
||||
/* store output to destination */
|
||||
pDst[2] = out3;
|
||||
|
||||
/* increment destination pointer by 8 to process next samples */
|
||||
pSrc += 8u;
|
||||
|
||||
/* store output to destination */
|
||||
pDst[3] = out4;
|
||||
|
||||
/* increment destination pointer by 4 to process next samples */
|
||||
pDst += 4u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
blkCnt = numSamples;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
|
||||
real = *pSrc++;
|
||||
imag = *pSrc++;
|
||||
|
||||
/* out = (real * real) + (imag * imag) */
|
||||
/* store the result in the destination buffer. */
|
||||
*pDst++ = (real * real) + (imag * imag);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cmplx_mag_squared group
|
||||
*/
|
|
@ -1,140 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_mag_squared_q15.c
|
||||
*
|
||||
* Description: Q15 complex magnitude squared.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cmplx_mag_squared
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q15 complex magnitude squared
|
||||
* @param *pSrc points to the complex input vector
|
||||
* @param *pDst points to the real output vector
|
||||
* @param numSamples number of complex samples in the input vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format.
|
||||
*/
|
||||
|
||||
void arm_cmplx_mag_squared_q15(
|
||||
q15_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
q31_t acc0, acc1; /* Accumulators */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
q31_t in1, in2, in3, in4;
|
||||
q31_t acc2, acc3;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
|
||||
in1 = *__SIMD32(pSrc)++;
|
||||
in2 = *__SIMD32(pSrc)++;
|
||||
in3 = *__SIMD32(pSrc)++;
|
||||
in4 = *__SIMD32(pSrc)++;
|
||||
|
||||
acc0 = __SMUAD(in1, in1);
|
||||
acc1 = __SMUAD(in2, in2);
|
||||
acc2 = __SMUAD(in3, in3);
|
||||
acc3 = __SMUAD(in4, in4);
|
||||
|
||||
/* store the result in 3.13 format in the destination buffer. */
|
||||
*pDst++ = (q15_t) (acc0 >> 17);
|
||||
*pDst++ = (q15_t) (acc1 >> 17);
|
||||
*pDst++ = (q15_t) (acc2 >> 17);
|
||||
*pDst++ = (q15_t) (acc3 >> 17);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
|
||||
in1 = *__SIMD32(pSrc)++;
|
||||
acc0 = __SMUAD(in1, in1);
|
||||
|
||||
/* store the result in 3.13 format in the destination buffer. */
|
||||
*pDst++ = (q15_t) (acc0 >> 17);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
q15_t real, imag; /* Temporary variables to store real and imaginary values */
|
||||
|
||||
while(numSamples > 0u)
|
||||
{
|
||||
/* out = ((real * real) + (imag * imag)) */
|
||||
real = *pSrc++;
|
||||
imag = *pSrc++;
|
||||
acc0 = (real * real);
|
||||
acc1 = (imag * imag);
|
||||
/* store the result in 3.13 format in the destination buffer. */
|
||||
*pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
numSamples--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cmplx_mag_squared group
|
||||
*/
|
|
@ -1,153 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_mag_squared_q31.c
|
||||
*
|
||||
* Description: Q31 complex magnitude squared.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cmplx_mag_squared
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Q31 complex magnitude squared
|
||||
* @param *pSrc points to the complex input vector
|
||||
* @param *pDst points to the real output vector
|
||||
* @param numSamples number of complex samples in the input vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
|
||||
* Input down scaling is not required.
|
||||
*/
|
||||
|
||||
void arm_cmplx_mag_squared_q31(
|
||||
q31_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
q31_t real, imag; /* Temporary variables to store real and imaginary values */
|
||||
q31_t acc0, acc1; /* Accumulators */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
/* loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
|
||||
real = *pSrc++;
|
||||
imag = *pSrc++;
|
||||
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
||||
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
||||
/* store the result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = acc0 + acc1;
|
||||
|
||||
real = *pSrc++;
|
||||
imag = *pSrc++;
|
||||
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
||||
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
||||
/* store the result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = acc0 + acc1;
|
||||
|
||||
real = *pSrc++;
|
||||
imag = *pSrc++;
|
||||
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
||||
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
||||
/* store the result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = acc0 + acc1;
|
||||
|
||||
real = *pSrc++;
|
||||
imag = *pSrc++;
|
||||
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
||||
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
||||
/* store the result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = acc0 + acc1;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
|
||||
real = *pSrc++;
|
||||
imag = *pSrc++;
|
||||
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
||||
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
||||
/* store the result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = acc0 + acc1;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
while(numSamples > 0u)
|
||||
{
|
||||
/* out = ((real * real) + (imag * imag)) */
|
||||
real = *pSrc++;
|
||||
imag = *pSrc++;
|
||||
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
||||
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
||||
/* store the result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = acc0 + acc1;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
numSamples--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cmplx_mag_squared group
|
||||
*/
|
|
@ -1,199 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_mult_cmplx_f32.c
|
||||
*
|
||||
* Description: Floating-point complex-by-complex multiplication
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup CmplxByCmplxMult Complex-by-Complex Multiplication
|
||||
*
|
||||
* Multiplies a complex vector by another complex vector and generates a complex result.
|
||||
* The data in the complex arrays is stored in an interleaved fashion
|
||||
* (real, imag, real, imag, ...).
|
||||
* The parameter <code>numSamples</code> represents the number of complex
|
||||
* samples processed. The complex arrays have a total of <code>2*numSamples</code>
|
||||
* real values.
|
||||
*
|
||||
* The underlying algorithm is used:
|
||||
*
|
||||
* <pre>
|
||||
* for(n=0; n<numSamples; n++) {
|
||||
* pDst[(2*n)+0] = pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
|
||||
* pDst[(2*n)+1] = pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating-point, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup CmplxByCmplxMult
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Floating-point complex-by-complex multiplication
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] numSamples number of complex samples in each vector
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_cmplx_mult_cmplx_f32(
|
||||
float32_t * pSrcA,
|
||||
float32_t * pSrcB,
|
||||
float32_t * pDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
float32_t a1, b1, c1, d1; /* Temporary variables to store real and imaginary values */
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
float32_t a2, b2, c2, d2; /* Temporary variables to store real and imaginary values */
|
||||
float32_t acc1, acc2, acc3, acc4;
|
||||
|
||||
|
||||
/* loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
||||
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
||||
a1 = *pSrcA; /* A[2 * i] */
|
||||
c1 = *pSrcB; /* B[2 * i] */
|
||||
|
||||
b1 = *(pSrcA + 1); /* A[2 * i + 1] */
|
||||
acc1 = a1 * c1; /* acc1 = A[2 * i] * B[2 * i] */
|
||||
|
||||
a2 = *(pSrcA + 2); /* A[2 * i + 2] */
|
||||
acc2 = (b1 * c1); /* acc2 = A[2 * i + 1] * B[2 * i] */
|
||||
|
||||
d1 = *(pSrcB + 1); /* B[2 * i + 1] */
|
||||
c2 = *(pSrcB + 2); /* B[2 * i + 2] */
|
||||
acc1 -= b1 * d1; /* acc1 = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1] */
|
||||
|
||||
d2 = *(pSrcB + 3); /* B[2 * i + 3] */
|
||||
acc3 = a2 * c2; /* acc3 = A[2 * i + 2] * B[2 * i + 2] */
|
||||
|
||||
b2 = *(pSrcA + 3); /* A[2 * i + 3] */
|
||||
acc2 += (a1 * d1); /* acc2 = A[2 * i + 1] * B[2 * i] + A[2 * i] * B[2 * i + 1] */
|
||||
|
||||
a1 = *(pSrcA + 4); /* A[2 * i + 4] */
|
||||
acc4 = (a2 * d2); /* acc4 = A[2 * i + 2] * B[2 * i + 3] */
|
||||
|
||||
c1 = *(pSrcB + 4); /* B[2 * i + 4] */
|
||||
acc3 -= (b2 * d2); /* acc3 = A[2 * i + 2] * B[2 * i + 2] - A[2 * i + 3] * B[2 * i + 3] */
|
||||
*pDst = acc1; /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1] */
|
||||
|
||||
b1 = *(pSrcA + 5); /* A[2 * i + 5] */
|
||||
acc4 += b2 * c2; /* acc4 = A[2 * i + 2] * B[2 * i + 3] + A[2 * i + 3] * B[2 * i + 2] */
|
||||
|
||||
*(pDst + 1) = acc2; /* C[2 * i + 1] = A[2 * i + 1] * B[2 * i] + A[2 * i] * B[2 * i + 1] */
|
||||
acc1 = (a1 * c1);
|
||||
|
||||
d1 = *(pSrcB + 5);
|
||||
acc2 = (b1 * c1);
|
||||
|
||||
*(pDst + 2) = acc3;
|
||||
*(pDst + 3) = acc4;
|
||||
|
||||
a2 = *(pSrcA + 6);
|
||||
acc1 -= (b1 * d1);
|
||||
|
||||
c2 = *(pSrcB + 6);
|
||||
acc2 += (a1 * d1);
|
||||
|
||||
b2 = *(pSrcA + 7);
|
||||
acc3 = (a2 * c2);
|
||||
|
||||
d2 = *(pSrcB + 7);
|
||||
acc4 = (b2 * c2);
|
||||
|
||||
*(pDst + 4) = acc1;
|
||||
pSrcA += 8u;
|
||||
|
||||
acc3 -= (b2 * d2);
|
||||
acc4 += (a2 * d2);
|
||||
|
||||
*(pDst + 5) = acc2;
|
||||
pSrcB += 8u;
|
||||
|
||||
*(pDst + 6) = acc3;
|
||||
*(pDst + 7) = acc4;
|
||||
|
||||
pDst += 8u;
|
||||
|
||||
/* Decrement the numSamples loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
blkCnt = numSamples;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
||||
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
||||
a1 = *pSrcA++;
|
||||
b1 = *pSrcA++;
|
||||
c1 = *pSrcB++;
|
||||
d1 = *pSrcB++;
|
||||
|
||||
/* store the result in the destination buffer. */
|
||||
*pDst++ = (a1 * c1) - (b1 * d1);
|
||||
*pDst++ = (a1 * d1) + (b1 * c1);
|
||||
|
||||
/* Decrement the numSamples loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of CmplxByCmplxMult group
|
||||
*/
|
|
@ -1,185 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_mult_cmplx_q15.c
|
||||
*
|
||||
* Description: Q15 complex-by-complex multiplication
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup CmplxByCmplxMult
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q15 complex-by-complex multiplication
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] numSamples number of complex samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format.
|
||||
*/
|
||||
|
||||
void arm_cmplx_mult_cmplx_q15(
|
||||
q15_t * pSrcA,
|
||||
q15_t * pSrcB,
|
||||
q15_t * pDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
q15_t a, b, c, d; /* Temporary variables to store real and imaginary values */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
|
||||
/* loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
||||
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
||||
a = *pSrcA++;
|
||||
b = *pSrcA++;
|
||||
c = *pSrcB++;
|
||||
d = *pSrcB++;
|
||||
|
||||
/* store the result in 3.13 format in the destination buffer. */
|
||||
*pDst++ =
|
||||
(q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
|
||||
/* store the result in 3.13 format in the destination buffer. */
|
||||
*pDst++ =
|
||||
(q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
|
||||
|
||||
a = *pSrcA++;
|
||||
b = *pSrcA++;
|
||||
c = *pSrcB++;
|
||||
d = *pSrcB++;
|
||||
|
||||
/* store the result in 3.13 format in the destination buffer. */
|
||||
*pDst++ =
|
||||
(q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
|
||||
/* store the result in 3.13 format in the destination buffer. */
|
||||
*pDst++ =
|
||||
(q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
|
||||
|
||||
a = *pSrcA++;
|
||||
b = *pSrcA++;
|
||||
c = *pSrcB++;
|
||||
d = *pSrcB++;
|
||||
|
||||
/* store the result in 3.13 format in the destination buffer. */
|
||||
*pDst++ =
|
||||
(q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
|
||||
/* store the result in 3.13 format in the destination buffer. */
|
||||
*pDst++ =
|
||||
(q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
|
||||
|
||||
a = *pSrcA++;
|
||||
b = *pSrcA++;
|
||||
c = *pSrcB++;
|
||||
d = *pSrcB++;
|
||||
|
||||
/* store the result in 3.13 format in the destination buffer. */
|
||||
*pDst++ =
|
||||
(q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
|
||||
/* store the result in 3.13 format in the destination buffer. */
|
||||
*pDst++ =
|
||||
(q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
||||
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
||||
a = *pSrcA++;
|
||||
b = *pSrcA++;
|
||||
c = *pSrcB++;
|
||||
d = *pSrcB++;
|
||||
|
||||
/* store the result in 3.13 format in the destination buffer. */
|
||||
*pDst++ =
|
||||
(q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
|
||||
/* store the result in 3.13 format in the destination buffer. */
|
||||
*pDst++ =
|
||||
(q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
while(numSamples > 0u)
|
||||
{
|
||||
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
||||
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
||||
a = *pSrcA++;
|
||||
b = *pSrcA++;
|
||||
c = *pSrcB++;
|
||||
d = *pSrcB++;
|
||||
|
||||
/* store the result in 3.13 format in the destination buffer. */
|
||||
*pDst++ =
|
||||
(q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
|
||||
/* store the result in 3.13 format in the destination buffer. */
|
||||
*pDst++ =
|
||||
(q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
numSamples--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of CmplxByCmplxMult group
|
||||
*/
|
|
@ -1,318 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_mult_cmplx_q31.c
|
||||
*
|
||||
* Description: Q31 complex-by-complex multiplication
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup CmplxByCmplxMult
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Q31 complex-by-complex multiplication
|
||||
* @param[in] *pSrcA points to the first input vector
|
||||
* @param[in] *pSrcB points to the second input vector
|
||||
* @param[out] *pDst points to the output vector
|
||||
* @param[in] numSamples number of complex samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
|
||||
* Input down scaling is not required.
|
||||
*/
|
||||
|
||||
void arm_cmplx_mult_cmplx_q31(
|
||||
q31_t * pSrcA,
|
||||
q31_t * pSrcB,
|
||||
q31_t * pDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
q31_t a, b, c, d; /* Temporary variables to store real and imaginary values */
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q31_t mul1, mul2, mul3, mul4;
|
||||
q31_t out1, out2;
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/* loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
||||
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
||||
a = *pSrcA++;
|
||||
b = *pSrcA++;
|
||||
c = *pSrcB++;
|
||||
d = *pSrcB++;
|
||||
|
||||
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
||||
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
||||
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
||||
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
||||
|
||||
mul1 = (mul1 >> 1);
|
||||
mul2 = (mul2 >> 1);
|
||||
mul3 = (mul3 >> 1);
|
||||
mul4 = (mul4 >> 1);
|
||||
|
||||
out1 = mul1 - mul2;
|
||||
out2 = mul3 + mul4;
|
||||
|
||||
/* store the real result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = out1;
|
||||
/* store the imag result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = out2;
|
||||
|
||||
a = *pSrcA++;
|
||||
b = *pSrcA++;
|
||||
c = *pSrcB++;
|
||||
d = *pSrcB++;
|
||||
|
||||
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
||||
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
||||
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
||||
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
||||
|
||||
mul1 = (mul1 >> 1);
|
||||
mul2 = (mul2 >> 1);
|
||||
mul3 = (mul3 >> 1);
|
||||
mul4 = (mul4 >> 1);
|
||||
|
||||
out1 = mul1 - mul2;
|
||||
out2 = mul3 + mul4;
|
||||
|
||||
/* store the real result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = out1;
|
||||
/* store the imag result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = out2;
|
||||
|
||||
a = *pSrcA++;
|
||||
b = *pSrcA++;
|
||||
c = *pSrcB++;
|
||||
d = *pSrcB++;
|
||||
|
||||
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
||||
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
||||
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
||||
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
||||
|
||||
mul1 = (mul1 >> 1);
|
||||
mul2 = (mul2 >> 1);
|
||||
mul3 = (mul3 >> 1);
|
||||
mul4 = (mul4 >> 1);
|
||||
|
||||
out1 = mul1 - mul2;
|
||||
out2 = mul3 + mul4;
|
||||
|
||||
/* store the real result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = out1;
|
||||
/* store the imag result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = out2;
|
||||
|
||||
a = *pSrcA++;
|
||||
b = *pSrcA++;
|
||||
c = *pSrcB++;
|
||||
d = *pSrcB++;
|
||||
|
||||
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
||||
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
||||
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
||||
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
||||
|
||||
mul1 = (mul1 >> 1);
|
||||
mul2 = (mul2 >> 1);
|
||||
mul3 = (mul3 >> 1);
|
||||
mul4 = (mul4 >> 1);
|
||||
|
||||
out1 = mul1 - mul2;
|
||||
out2 = mul3 + mul4;
|
||||
|
||||
/* store the real result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = out1;
|
||||
/* store the imag result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = out2;
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
||||
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
||||
a = *pSrcA++;
|
||||
b = *pSrcA++;
|
||||
c = *pSrcB++;
|
||||
d = *pSrcB++;
|
||||
|
||||
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
||||
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
||||
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
||||
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
||||
|
||||
mul1 = (mul1 >> 1);
|
||||
mul2 = (mul2 >> 1);
|
||||
mul3 = (mul3 >> 1);
|
||||
mul4 = (mul4 >> 1);
|
||||
|
||||
out1 = mul1 - mul2;
|
||||
out2 = mul3 + mul4;
|
||||
|
||||
/* store the real result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = out1;
|
||||
/* store the imag result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = out2;
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* loop Unrolling */
|
||||
blkCnt = numSamples >> 1u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 2 outputs at a time.
|
||||
** a second loop below computes the remaining 1 sample. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
||||
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
||||
a = *pSrcA++;
|
||||
b = *pSrcA++;
|
||||
c = *pSrcB++;
|
||||
d = *pSrcB++;
|
||||
|
||||
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
||||
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
||||
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
||||
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
||||
|
||||
mul1 = (mul1 >> 1);
|
||||
mul2 = (mul2 >> 1);
|
||||
mul3 = (mul3 >> 1);
|
||||
mul4 = (mul4 >> 1);
|
||||
|
||||
out1 = mul1 - mul2;
|
||||
out2 = mul3 + mul4;
|
||||
|
||||
/* store the real result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = out1;
|
||||
/* store the imag result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = out2;
|
||||
|
||||
a = *pSrcA++;
|
||||
b = *pSrcA++;
|
||||
c = *pSrcB++;
|
||||
d = *pSrcB++;
|
||||
|
||||
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
||||
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
||||
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
||||
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
||||
|
||||
mul1 = (mul1 >> 1);
|
||||
mul2 = (mul2 >> 1);
|
||||
mul3 = (mul3 >> 1);
|
||||
mul4 = (mul4 >> 1);
|
||||
|
||||
out1 = mul1 - mul2;
|
||||
out2 = mul3 + mul4;
|
||||
|
||||
/* store the real result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = out1;
|
||||
/* store the imag result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = out2;
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 2, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x2u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
||||
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
||||
a = *pSrcA++;
|
||||
b = *pSrcA++;
|
||||
c = *pSrcB++;
|
||||
d = *pSrcB++;
|
||||
|
||||
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
||||
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
||||
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
||||
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
||||
|
||||
mul1 = (mul1 >> 1);
|
||||
mul2 = (mul2 >> 1);
|
||||
mul3 = (mul3 >> 1);
|
||||
mul4 = (mul4 >> 1);
|
||||
|
||||
out1 = mul1 - mul2;
|
||||
out2 = mul3 + mul4;
|
||||
|
||||
/* store the real result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = out1;
|
||||
/* store the imag result in 3.29 format in the destination buffer. */
|
||||
*pDst++ = out2;
|
||||
|
||||
/* Decrement the blockSize loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of CmplxByCmplxMult group
|
||||
*/
|
|
@ -1,217 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_mult_real_f32.c
|
||||
*
|
||||
* Description: Floating-point complex by real multiplication
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup CmplxByRealMult Complex-by-Real Multiplication
|
||||
*
|
||||
* Multiplies a complex vector by a real vector and generates a complex result.
|
||||
* The data in the complex arrays is stored in an interleaved fashion
|
||||
* (real, imag, real, imag, ...).
|
||||
* The parameter <code>numSamples</code> represents the number of complex
|
||||
* samples processed. The complex arrays have a total of <code>2*numSamples</code>
|
||||
* real values while the real array has a total of <code>numSamples</code>
|
||||
* real values.
|
||||
*
|
||||
* The underlying algorithm is used:
|
||||
*
|
||||
* <pre>
|
||||
* for(n=0; n<numSamples; n++) {
|
||||
* pCmplxDst[(2*n)+0] = pSrcCmplx[(2*n)+0] * pSrcReal[n];
|
||||
* pCmplxDst[(2*n)+1] = pSrcCmplx[(2*n)+1] * pSrcReal[n];
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating-point, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup CmplxByRealMult
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Floating-point complex-by-real multiplication
|
||||
* @param[in] *pSrcCmplx points to the complex input vector
|
||||
* @param[in] *pSrcReal points to the real input vector
|
||||
* @param[out] *pCmplxDst points to the complex output vector
|
||||
* @param[in] numSamples number of samples in each vector
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_cmplx_mult_real_f32(
|
||||
float32_t * pSrcCmplx,
|
||||
float32_t * pSrcReal,
|
||||
float32_t * pCmplxDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
float32_t in; /* Temporary variable to store input value */
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
float32_t inA1, inA2, inA3, inA4; /* Temporary variables to hold input data */
|
||||
float32_t inA5, inA6, inA7, inA8; /* Temporary variables to hold input data */
|
||||
float32_t inB1, inB2, inB3, inB4; /* Temporary variables to hold input data */
|
||||
float32_t out1, out2, out3, out4; /* Temporary variables to hold output data */
|
||||
float32_t out5, out6, out7, out8; /* Temporary variables to hold output data */
|
||||
|
||||
/* loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[2 * i] = A[2 * i] * B[i]. */
|
||||
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
|
||||
/* read input from complex input buffer */
|
||||
inA1 = pSrcCmplx[0];
|
||||
inA2 = pSrcCmplx[1];
|
||||
/* read input from real input buffer */
|
||||
inB1 = pSrcReal[0];
|
||||
|
||||
/* read input from complex input buffer */
|
||||
inA3 = pSrcCmplx[2];
|
||||
|
||||
/* multiply complex buffer real input with real buffer input */
|
||||
out1 = inA1 * inB1;
|
||||
|
||||
/* read input from complex input buffer */
|
||||
inA4 = pSrcCmplx[3];
|
||||
|
||||
/* multiply complex buffer imaginary input with real buffer input */
|
||||
out2 = inA2 * inB1;
|
||||
|
||||
/* read input from real input buffer */
|
||||
inB2 = pSrcReal[1];
|
||||
/* read input from complex input buffer */
|
||||
inA5 = pSrcCmplx[4];
|
||||
|
||||
/* multiply complex buffer real input with real buffer input */
|
||||
out3 = inA3 * inB2;
|
||||
|
||||
/* read input from complex input buffer */
|
||||
inA6 = pSrcCmplx[5];
|
||||
/* read input from real input buffer */
|
||||
inB3 = pSrcReal[2];
|
||||
|
||||
/* multiply complex buffer imaginary input with real buffer input */
|
||||
out4 = inA4 * inB2;
|
||||
|
||||
/* read input from complex input buffer */
|
||||
inA7 = pSrcCmplx[6];
|
||||
|
||||
/* multiply complex buffer real input with real buffer input */
|
||||
out5 = inA5 * inB3;
|
||||
|
||||
/* read input from complex input buffer */
|
||||
inA8 = pSrcCmplx[7];
|
||||
|
||||
/* multiply complex buffer imaginary input with real buffer input */
|
||||
out6 = inA6 * inB3;
|
||||
|
||||
/* read input from real input buffer */
|
||||
inB4 = pSrcReal[3];
|
||||
|
||||
/* store result to destination bufer */
|
||||
pCmplxDst[0] = out1;
|
||||
|
||||
/* multiply complex buffer real input with real buffer input */
|
||||
out7 = inA7 * inB4;
|
||||
|
||||
/* store result to destination bufer */
|
||||
pCmplxDst[1] = out2;
|
||||
|
||||
/* multiply complex buffer imaginary input with real buffer input */
|
||||
out8 = inA8 * inB4;
|
||||
|
||||
/* store result to destination bufer */
|
||||
pCmplxDst[2] = out3;
|
||||
pCmplxDst[3] = out4;
|
||||
pCmplxDst[4] = out5;
|
||||
|
||||
/* incremnet complex input buffer by 8 to process next samples */
|
||||
pSrcCmplx += 8u;
|
||||
|
||||
/* store result to destination bufer */
|
||||
pCmplxDst[5] = out6;
|
||||
|
||||
/* increment real input buffer by 4 to process next samples */
|
||||
pSrcReal += 4u;
|
||||
|
||||
/* store result to destination bufer */
|
||||
pCmplxDst[6] = out7;
|
||||
pCmplxDst[7] = out8;
|
||||
|
||||
/* increment destination buffer by 8 to process next sampels */
|
||||
pCmplxDst += 8u;
|
||||
|
||||
/* Decrement the numSamples loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
blkCnt = numSamples;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[2 * i] = A[2 * i] * B[i]. */
|
||||
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
|
||||
in = *pSrcReal++;
|
||||
/* store the result in the destination buffer. */
|
||||
*pCmplxDst++ = (*pSrcCmplx++) * (in);
|
||||
*pCmplxDst++ = (*pSrcCmplx++) * (in);
|
||||
|
||||
/* Decrement the numSamples loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of CmplxByRealMult group
|
||||
*/
|
|
@ -1,195 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_mult_real_q15.c
|
||||
*
|
||||
* Description: Q15 complex by real multiplication
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup CmplxByRealMult
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Q15 complex-by-real multiplication
|
||||
* @param[in] *pSrcCmplx points to the complex input vector
|
||||
* @param[in] *pSrcReal points to the real input vector
|
||||
* @param[out] *pCmplxDst points to the complex output vector
|
||||
* @param[in] numSamples number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_cmplx_mult_real_q15(
|
||||
q15_t * pSrcCmplx,
|
||||
q15_t * pSrcReal,
|
||||
q15_t * pCmplxDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
q15_t in; /* Temporary variable to store input value */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q31_t inA1, inA2; /* Temporary variables to hold input data */
|
||||
q31_t inB1; /* Temporary variables to hold input data */
|
||||
q15_t out1, out2, out3, out4; /* Temporary variables to hold output data */
|
||||
q31_t mul1, mul2, mul3, mul4; /* Temporary variables to hold intermediate data */
|
||||
|
||||
/* loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[2 * i] = A[2 * i] * B[i]. */
|
||||
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
|
||||
/* read complex number both real and imaginary from complex input buffer */
|
||||
inA1 = *__SIMD32(pSrcCmplx)++;
|
||||
/* read two real values at a time from real input buffer */
|
||||
inB1 = *__SIMD32(pSrcReal)++;
|
||||
/* read complex number both real and imaginary from complex input buffer */
|
||||
inA2 = *__SIMD32(pSrcCmplx)++;
|
||||
|
||||
/* multiply complex number with real numbers */
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
mul1 = (q31_t) ((q15_t) (inA1) * (q15_t) (inB1));
|
||||
mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1));
|
||||
mul3 = (q31_t) ((q15_t) (inA2) * (q15_t) (inB1 >> 16));
|
||||
mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16));
|
||||
|
||||
#else
|
||||
|
||||
mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
|
||||
mul1 = (q31_t) ((q15_t) inA1 * (q15_t) (inB1 >> 16));
|
||||
mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1);
|
||||
mul3 = (q31_t) ((q15_t) inA2 * (q15_t) inB1);
|
||||
|
||||
#endif // #ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
/* saturate the result */
|
||||
out1 = (q15_t) __SSAT(mul1 >> 15u, 16);
|
||||
out2 = (q15_t) __SSAT(mul2 >> 15u, 16);
|
||||
out3 = (q15_t) __SSAT(mul3 >> 15u, 16);
|
||||
out4 = (q15_t) __SSAT(mul4 >> 15u, 16);
|
||||
|
||||
/* pack real and imaginary outputs and store them to destination */
|
||||
*__SIMD32(pCmplxDst)++ = __PKHBT(out1, out2, 16);
|
||||
*__SIMD32(pCmplxDst)++ = __PKHBT(out3, out4, 16);
|
||||
|
||||
inA1 = *__SIMD32(pSrcCmplx)++;
|
||||
inB1 = *__SIMD32(pSrcReal)++;
|
||||
inA2 = *__SIMD32(pSrcCmplx)++;
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
mul1 = (q31_t) ((q15_t) (inA1) * (q15_t) (inB1));
|
||||
mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1));
|
||||
mul3 = (q31_t) ((q15_t) (inA2) * (q15_t) (inB1 >> 16));
|
||||
mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16));
|
||||
|
||||
#else
|
||||
|
||||
mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
|
||||
mul1 = (q31_t) ((q15_t) inA1 * (q15_t) (inB1 >> 16));
|
||||
mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1);
|
||||
mul3 = (q31_t) ((q15_t) inA2 * (q15_t) inB1);
|
||||
|
||||
#endif // #ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
out1 = (q15_t) __SSAT(mul1 >> 15u, 16);
|
||||
out2 = (q15_t) __SSAT(mul2 >> 15u, 16);
|
||||
out3 = (q15_t) __SSAT(mul3 >> 15u, 16);
|
||||
out4 = (q15_t) __SSAT(mul4 >> 15u, 16);
|
||||
|
||||
*__SIMD32(pCmplxDst)++ = __PKHBT(out1, out2, 16);
|
||||
*__SIMD32(pCmplxDst)++ = __PKHBT(out3, out4, 16);
|
||||
|
||||
/* Decrement the numSamples loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[2 * i] = A[2 * i] * B[i]. */
|
||||
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
|
||||
in = *pSrcReal++;
|
||||
/* store the result in the destination buffer. */
|
||||
*pCmplxDst++ =
|
||||
(q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16);
|
||||
*pCmplxDst++ =
|
||||
(q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16);
|
||||
|
||||
/* Decrement the numSamples loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
while(numSamples > 0u)
|
||||
{
|
||||
/* realOut = realA * realB. */
|
||||
/* imagOut = imagA * realB. */
|
||||
in = *pSrcReal++;
|
||||
/* store the result in the destination buffer. */
|
||||
*pCmplxDst++ =
|
||||
(q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16);
|
||||
*pCmplxDst++ =
|
||||
(q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16);
|
||||
|
||||
/* Decrement the numSamples loop counter */
|
||||
numSamples--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of CmplxByRealMult group
|
||||
*/
|
|
@ -1,215 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cmplx_mult_real_q31.c
|
||||
*
|
||||
* Description: Q31 complex by real multiplication
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupCmplxMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup CmplxByRealMult
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Q31 complex-by-real multiplication
|
||||
* @param[in] *pSrcCmplx points to the complex input vector
|
||||
* @param[in] *pSrcReal points to the real input vector
|
||||
* @param[out] *pCmplxDst points to the complex output vector
|
||||
* @param[in] numSamples number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_cmplx_mult_real_q31(
|
||||
q31_t * pSrcCmplx,
|
||||
q31_t * pSrcReal,
|
||||
q31_t * pCmplxDst,
|
||||
uint32_t numSamples)
|
||||
{
|
||||
q31_t inA1; /* Temporary variable to store input value */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q31_t inA2, inA3, inA4; /* Temporary variables to hold input data */
|
||||
q31_t inB1, inB2; /* Temporary variabels to hold input data */
|
||||
q31_t out1, out2, out3, out4; /* Temporary variables to hold output data */
|
||||
|
||||
/* loop Unrolling */
|
||||
blkCnt = numSamples >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[2 * i] = A[2 * i] * B[i]. */
|
||||
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
|
||||
/* read real input from complex input buffer */
|
||||
inA1 = *pSrcCmplx++;
|
||||
inA2 = *pSrcCmplx++;
|
||||
/* read input from real input bufer */
|
||||
inB1 = *pSrcReal++;
|
||||
inB2 = *pSrcReal++;
|
||||
/* read imaginary input from complex input buffer */
|
||||
inA3 = *pSrcCmplx++;
|
||||
inA4 = *pSrcCmplx++;
|
||||
|
||||
/* multiply complex input with real input */
|
||||
out1 = ((q63_t) inA1 * inB1) >> 32;
|
||||
out2 = ((q63_t) inA2 * inB1) >> 32;
|
||||
out3 = ((q63_t) inA3 * inB2) >> 32;
|
||||
out4 = ((q63_t) inA4 * inB2) >> 32;
|
||||
|
||||
/* sature the result */
|
||||
out1 = __SSAT(out1, 31);
|
||||
out2 = __SSAT(out2, 31);
|
||||
out3 = __SSAT(out3, 31);
|
||||
out4 = __SSAT(out4, 31);
|
||||
|
||||
/* get result in 1.31 format */
|
||||
out1 = out1 << 1;
|
||||
out2 = out2 << 1;
|
||||
out3 = out3 << 1;
|
||||
out4 = out4 << 1;
|
||||
|
||||
/* store the result to destination buffer */
|
||||
*pCmplxDst++ = out1;
|
||||
*pCmplxDst++ = out2;
|
||||
*pCmplxDst++ = out3;
|
||||
*pCmplxDst++ = out4;
|
||||
|
||||
/* read real input from complex input buffer */
|
||||
inA1 = *pSrcCmplx++;
|
||||
inA2 = *pSrcCmplx++;
|
||||
/* read input from real input bufer */
|
||||
inB1 = *pSrcReal++;
|
||||
inB2 = *pSrcReal++;
|
||||
/* read imaginary input from complex input buffer */
|
||||
inA3 = *pSrcCmplx++;
|
||||
inA4 = *pSrcCmplx++;
|
||||
|
||||
/* multiply complex input with real input */
|
||||
out1 = ((q63_t) inA1 * inB1) >> 32;
|
||||
out2 = ((q63_t) inA2 * inB1) >> 32;
|
||||
out3 = ((q63_t) inA3 * inB2) >> 32;
|
||||
out4 = ((q63_t) inA4 * inB2) >> 32;
|
||||
|
||||
/* sature the result */
|
||||
out1 = __SSAT(out1, 31);
|
||||
out2 = __SSAT(out2, 31);
|
||||
out3 = __SSAT(out3, 31);
|
||||
out4 = __SSAT(out4, 31);
|
||||
|
||||
/* get result in 1.31 format */
|
||||
out1 = out1 << 1;
|
||||
out2 = out2 << 1;
|
||||
out3 = out3 << 1;
|
||||
out4 = out4 << 1;
|
||||
|
||||
/* store the result to destination buffer */
|
||||
*pCmplxDst++ = out1;
|
||||
*pCmplxDst++ = out2;
|
||||
*pCmplxDst++ = out3;
|
||||
*pCmplxDst++ = out4;
|
||||
|
||||
/* Decrement the numSamples loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = numSamples % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C[2 * i] = A[2 * i] * B[i]. */
|
||||
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
|
||||
/* read real input from complex input buffer */
|
||||
inA1 = *pSrcCmplx++;
|
||||
inA2 = *pSrcCmplx++;
|
||||
/* read input from real input bufer */
|
||||
inB1 = *pSrcReal++;
|
||||
|
||||
/* multiply complex input with real input */
|
||||
out1 = ((q63_t) inA1 * inB1) >> 32;
|
||||
out2 = ((q63_t) inA2 * inB1) >> 32;
|
||||
|
||||
/* sature the result */
|
||||
out1 = __SSAT(out1, 31);
|
||||
out2 = __SSAT(out2, 31);
|
||||
|
||||
/* get result in 1.31 format */
|
||||
out1 = out1 << 1;
|
||||
out2 = out2 << 1;
|
||||
|
||||
/* store the result to destination buffer */
|
||||
*pCmplxDst++ = out1;
|
||||
*pCmplxDst++ = out2;
|
||||
|
||||
/* Decrement the numSamples loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
while(numSamples > 0u)
|
||||
{
|
||||
/* realOut = realA * realB. */
|
||||
/* imagReal = imagA * realB. */
|
||||
inA1 = *pSrcReal++;
|
||||
/* store the result in the destination buffer. */
|
||||
*pCmplxDst++ =
|
||||
(q31_t) clip_q63_to_q31(((q63_t) * pSrcCmplx++ * inA1) >> 31);
|
||||
*pCmplxDst++ =
|
||||
(q31_t) clip_q63_to_q31(((q63_t) * pSrcCmplx++ * inA1) >> 31);
|
||||
|
||||
/* Decrement the numSamples loop counter */
|
||||
numSamples--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of CmplxByRealMult group
|
||||
*/
|
|
@ -1,79 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_pid_init_f32.c
|
||||
*
|
||||
* Description: Floating-point PID Control initialization function
|
||||
*
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @addtogroup PID
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Initialization function for the floating-point PID Control.
|
||||
* @param[in,out] *S points to an instance of the PID structure.
|
||||
* @param[in] resetStateFlag flag to reset the state. 0 = no change in state & 1 = reset the state.
|
||||
* @return none.
|
||||
* \par Description:
|
||||
* \par
|
||||
* The <code>resetStateFlag</code> specifies whether to set state to zero or not. \n
|
||||
* The function computes the structure fields: <code>A0</code>, <code>A1</code> <code>A2</code>
|
||||
* using the proportional gain( \c Kp), integral gain( \c Ki) and derivative gain( \c Kd)
|
||||
* also sets the state variables to all zeros.
|
||||
*/
|
||||
|
||||
void arm_pid_init_f32(
|
||||
arm_pid_instance_f32 * S,
|
||||
int32_t resetStateFlag)
|
||||
{
|
||||
|
||||
/* Derived coefficient A0 */
|
||||
S->A0 = S->Kp + S->Ki + S->Kd;
|
||||
|
||||
/* Derived coefficient A1 */
|
||||
S->A1 = (-S->Kp) - ((float32_t) 2.0 * S->Kd);
|
||||
|
||||
/* Derived coefficient A2 */
|
||||
S->A2 = S->Kd;
|
||||
|
||||
/* Check whether state needs reset or not */
|
||||
if(resetStateFlag)
|
||||
{
|
||||
/* Clear the state buffer. The size will be always 3 samples */
|
||||
memset(S->state, 0, 3u * sizeof(float32_t));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of PID group
|
||||
*/
|
|
@ -1,114 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_pid_init_q15.c
|
||||
*
|
||||
* Description: Q15 PID Control initialization function
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @addtogroup PID
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @details
|
||||
* @param[in,out] *S points to an instance of the Q15 PID structure.
|
||||
* @param[in] resetStateFlag flag to reset the state. 0 = no change in state 1 = reset the state.
|
||||
* @return none.
|
||||
* \par Description:
|
||||
* \par
|
||||
* The <code>resetStateFlag</code> specifies whether to set state to zero or not. \n
|
||||
* The function computes the structure fields: <code>A0</code>, <code>A1</code> <code>A2</code>
|
||||
* using the proportional gain( \c Kp), integral gain( \c Ki) and derivative gain( \c Kd)
|
||||
* also sets the state variables to all zeros.
|
||||
*/
|
||||
|
||||
void arm_pid_init_q15(
|
||||
arm_pid_instance_q15 * S,
|
||||
int32_t resetStateFlag)
|
||||
{
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/* Derived coefficient A0 */
|
||||
S->A0 = __QADD16(__QADD16(S->Kp, S->Ki), S->Kd);
|
||||
|
||||
/* Derived coefficients and pack into A1 */
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
S->A1 = __PKHBT(-__QADD16(__QADD16(S->Kd, S->Kd), S->Kp), S->Kd, 16);
|
||||
|
||||
#else
|
||||
|
||||
S->A1 = __PKHBT(S->Kd, -__QADD16(__QADD16(S->Kd, S->Kd), S->Kp), 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* Check whether state needs reset or not */
|
||||
if(resetStateFlag)
|
||||
{
|
||||
/* Clear the state buffer. The size will be always 3 samples */
|
||||
memset(S->state, 0, 3u * sizeof(q15_t));
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
q31_t temp; /*to store the sum */
|
||||
|
||||
/* Derived coefficient A0 */
|
||||
temp = S->Kp + S->Ki + S->Kd;
|
||||
S->A0 = (q15_t) __SSAT(temp, 16);
|
||||
|
||||
/* Derived coefficients and pack into A1 */
|
||||
temp = -(S->Kd + S->Kd + S->Kp);
|
||||
S->A1 = (q15_t) __SSAT(temp, 16);
|
||||
S->A2 = S->Kd;
|
||||
|
||||
|
||||
|
||||
/* Check whether state needs reset or not */
|
||||
if(resetStateFlag)
|
||||
{
|
||||
/* Clear the state buffer. The size will be always 3 samples */
|
||||
memset(S->state, 0, 3u * sizeof(q15_t));
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of PID group
|
||||
*/
|
|
@ -1,99 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_pid_init_q31.c
|
||||
*
|
||||
* Description: Q31 PID Control initialization function
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @addtogroup PID
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Initialization function for the Q31 PID Control.
|
||||
* @param[in,out] *S points to an instance of the Q31 PID structure.
|
||||
* @param[in] resetStateFlag flag to reset the state. 0 = no change in state 1 = reset the state.
|
||||
* @return none.
|
||||
* \par Description:
|
||||
* \par
|
||||
* The <code>resetStateFlag</code> specifies whether to set state to zero or not. \n
|
||||
* The function computes the structure fields: <code>A0</code>, <code>A1</code> <code>A2</code>
|
||||
* using the proportional gain( \c Kp), integral gain( \c Ki) and derivative gain( \c Kd)
|
||||
* also sets the state variables to all zeros.
|
||||
*/
|
||||
|
||||
void arm_pid_init_q31(
|
||||
arm_pid_instance_q31 * S,
|
||||
int32_t resetStateFlag)
|
||||
{
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/* Derived coefficient A0 */
|
||||
S->A0 = __QADD(__QADD(S->Kp, S->Ki), S->Kd);
|
||||
|
||||
/* Derived coefficient A1 */
|
||||
S->A1 = -__QADD(__QADD(S->Kd, S->Kd), S->Kp);
|
||||
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
q31_t temp;
|
||||
|
||||
/* Derived coefficient A0 */
|
||||
temp = clip_q63_to_q31((q63_t) S->Kp + S->Ki);
|
||||
S->A0 = clip_q63_to_q31((q63_t) temp + S->Kd);
|
||||
|
||||
/* Derived coefficient A1 */
|
||||
temp = clip_q63_to_q31((q63_t) S->Kd + S->Kd);
|
||||
S->A1 = -clip_q63_to_q31((q63_t) temp + S->Kp);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
/* Derived coefficient A2 */
|
||||
S->A2 = S->Kd;
|
||||
|
||||
/* Check whether state needs reset or not */
|
||||
if(resetStateFlag)
|
||||
{
|
||||
/* Clear the state buffer. The size will be always 3 samples */
|
||||
memset(S->state, 0, 3u * sizeof(q31_t));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of PID group
|
||||
*/
|
|
@ -1,57 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_pid_reset_f32.c
|
||||
*
|
||||
* Description: Floating-point PID Control reset function
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @addtogroup PID
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Reset function for the floating-point PID Control.
|
||||
* @param[in] *S Instance pointer of PID control data structure.
|
||||
* @return none.
|
||||
* \par Description:
|
||||
* The function resets the state buffer to zeros.
|
||||
*/
|
||||
void arm_pid_reset_f32(
|
||||
arm_pid_instance_f32 * S)
|
||||
{
|
||||
|
||||
/* Clear the state buffer. The size will be always 3 samples */
|
||||
memset(S->state, 0, 3u * sizeof(float32_t));
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of PID group
|
||||
*/
|
|
@ -1,56 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_pid_reset_q15.c
|
||||
*
|
||||
* Description: Q15 PID Control reset function
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @addtogroup PID
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Reset function for the Q15 PID Control.
|
||||
* @param[in] *S Instance pointer of PID control data structure.
|
||||
* @return none.
|
||||
* \par Description:
|
||||
* The function resets the state buffer to zeros.
|
||||
*/
|
||||
void arm_pid_reset_q15(
|
||||
arm_pid_instance_q15 * S)
|
||||
{
|
||||
/* Reset state to zero, The size will be always 3 samples */
|
||||
memset(S->state, 0, 3u * sizeof(q15_t));
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of PID group
|
||||
*/
|
|
@ -1,57 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_pid_reset_q31.c
|
||||
*
|
||||
* Description: Q31 PID Control reset function
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @addtogroup PID
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Reset function for the Q31 PID Control.
|
||||
* @param[in] *S Instance pointer of PID control data structure.
|
||||
* @return none.
|
||||
* \par Description:
|
||||
* The function resets the state buffer to zeros.
|
||||
*/
|
||||
void arm_pid_reset_q31(
|
||||
arm_pid_instance_q31 * S)
|
||||
{
|
||||
|
||||
/* Clear the state buffer. The size will be always 3 samples */
|
||||
memset(S->state, 0, 3u * sizeof(q31_t));
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of PID group
|
||||
*/
|
|
@ -1,428 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sin_cos_f32.c
|
||||
*
|
||||
* Description: Sine and Cosine calculation for floating-point values.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupController
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup SinCos Sine Cosine
|
||||
*
|
||||
* Computes the trigonometric sine and cosine values using a combination of table lookup
|
||||
* and linear interpolation.
|
||||
* There are separate functions for Q31 and floating-point data types.
|
||||
* The input to the floating-point version is in degrees while the
|
||||
* fixed-point Q31 have a scaled input with the range
|
||||
* [-1 0.9999] mapping to [-180 179] degrees.
|
||||
*
|
||||
* The implementation is based on table lookup using 360 values together with linear interpolation.
|
||||
* The steps used are:
|
||||
* -# Calculation of the nearest integer table index.
|
||||
* -# Compute the fractional portion (fract) of the input.
|
||||
* -# Fetch the value corresponding to \c index from sine table to \c y0 and also value from \c index+1 to \c y1.
|
||||
* -# Sine value is computed as <code> *psinVal = y0 + (fract * (y1 - y0))</code>.
|
||||
* -# Fetch the value corresponding to \c index from cosine table to \c y0 and also value from \c index+1 to \c y1.
|
||||
* -# Cosine value is computed as <code> *pcosVal = y0 + (fract * (y1 - y0))</code>.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup SinCos
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* \par
|
||||
* Cosine Table is generated from following loop
|
||||
* <pre>for(i = 0; i < 360; i++)
|
||||
* {
|
||||
* cosTable[i]= cos((i-180) * PI/180.0);
|
||||
* } </pre>
|
||||
*/
|
||||
|
||||
static const float32_t cosTable[360] = {
|
||||
-0.999847695156391270f, -0.999390827019095760f, -0.998629534754573830f,
|
||||
-0.997564050259824200f, -0.996194698091745550f, -0.994521895368273290f,
|
||||
-0.992546151641321980f, -0.990268068741570250f,
|
||||
-0.987688340595137660f, -0.984807753012208020f, -0.981627183447663980f,
|
||||
-0.978147600733805690f, -0.974370064785235250f, -0.970295726275996470f,
|
||||
-0.965925826289068200f, -0.961261695938318670f,
|
||||
-0.956304755963035440f, -0.951056516295153530f, -0.945518575599316740f,
|
||||
-0.939692620785908320f, -0.933580426497201740f, -0.927183854566787310f,
|
||||
-0.920504853452440150f, -0.913545457642600760f,
|
||||
-0.906307787036649940f, -0.898794046299167040f, -0.891006524188367790f,
|
||||
-0.882947592858926770f, -0.874619707139395740f, -0.866025403784438710f,
|
||||
-0.857167300702112220f, -0.848048096156425960f,
|
||||
-0.838670567945424160f, -0.829037572555041620f, -0.819152044288991580f,
|
||||
-0.809016994374947340f, -0.798635510047292940f, -0.788010753606721900f,
|
||||
-0.777145961456970680f, -0.766044443118977900f,
|
||||
-0.754709580222772010f, -0.743144825477394130f, -0.731353701619170460f,
|
||||
-0.719339800338651300f, -0.707106781186547460f, -0.694658370458997030f,
|
||||
-0.681998360062498370f, -0.669130606358858240f,
|
||||
-0.656059028990507500f, -0.642787609686539360f, -0.629320391049837280f,
|
||||
-0.615661475325658290f, -0.601815023152048380f, -0.587785252292473030f,
|
||||
-0.573576436351045830f, -0.559192903470746680f,
|
||||
-0.544639035015027080f, -0.529919264233204790f, -0.515038074910054270f,
|
||||
-0.499999999999999780f, -0.484809620246337000f, -0.469471562785890530f,
|
||||
-0.453990499739546750f, -0.438371146789077510f,
|
||||
-0.422618261740699330f, -0.406736643075800100f, -0.390731128489273600f,
|
||||
-0.374606593415912070f, -0.358367949545300270f, -0.342020143325668710f,
|
||||
-0.325568154457156420f, -0.309016994374947340f,
|
||||
-0.292371704722736660f, -0.275637355816999050f, -0.258819045102520850f,
|
||||
-0.241921895599667790f, -0.224951054343864810f, -0.207911690817759120f,
|
||||
-0.190808995376544800f, -0.173648177666930300f,
|
||||
-0.156434465040231040f, -0.139173100960065350f, -0.121869343405147370f,
|
||||
-0.104528463267653330f, -0.087155742747658235f, -0.069756473744125330f,
|
||||
-0.052335956242943620f, -0.034899496702500733f,
|
||||
-0.017452406437283477f, 0.000000000000000061f, 0.017452406437283376f,
|
||||
0.034899496702501080f, 0.052335956242943966f, 0.069756473744125455f,
|
||||
0.087155742747658138f, 0.104528463267653460f,
|
||||
0.121869343405147490f, 0.139173100960065690f, 0.156434465040230920f,
|
||||
0.173648177666930410f, 0.190808995376544920f, 0.207911690817759450f,
|
||||
0.224951054343864920f, 0.241921895599667900f,
|
||||
0.258819045102520740f, 0.275637355816999160f, 0.292371704722736770f,
|
||||
0.309016994374947450f, 0.325568154457156760f, 0.342020143325668820f,
|
||||
0.358367949545300380f, 0.374606593415911960f,
|
||||
0.390731128489273940f, 0.406736643075800210f, 0.422618261740699440f,
|
||||
0.438371146789077460f, 0.453990499739546860f, 0.469471562785890860f,
|
||||
0.484809620246337110f, 0.500000000000000110f,
|
||||
0.515038074910054380f, 0.529919264233204900f, 0.544639035015027200f,
|
||||
0.559192903470746790f, 0.573576436351046050f, 0.587785252292473140f,
|
||||
0.601815023152048270f, 0.615661475325658290f,
|
||||
0.629320391049837500f, 0.642787609686539360f, 0.656059028990507280f,
|
||||
0.669130606358858240f, 0.681998360062498480f, 0.694658370458997370f,
|
||||
0.707106781186547570f, 0.719339800338651190f,
|
||||
0.731353701619170570f, 0.743144825477394240f, 0.754709580222772010f,
|
||||
0.766044443118978010f, 0.777145961456970900f, 0.788010753606722010f,
|
||||
0.798635510047292830f, 0.809016994374947450f,
|
||||
0.819152044288991800f, 0.829037572555041620f, 0.838670567945424050f,
|
||||
0.848048096156425960f, 0.857167300702112330f, 0.866025403784438710f,
|
||||
0.874619707139395740f, 0.882947592858926990f,
|
||||
0.891006524188367900f, 0.898794046299167040f, 0.906307787036649940f,
|
||||
0.913545457642600870f, 0.920504853452440370f, 0.927183854566787420f,
|
||||
0.933580426497201740f, 0.939692620785908430f,
|
||||
0.945518575599316850f, 0.951056516295153530f, 0.956304755963035440f,
|
||||
0.961261695938318890f, 0.965925826289068310f, 0.970295726275996470f,
|
||||
0.974370064785235250f, 0.978147600733805690f,
|
||||
0.981627183447663980f, 0.984807753012208020f, 0.987688340595137770f,
|
||||
0.990268068741570360f, 0.992546151641321980f, 0.994521895368273290f,
|
||||
0.996194698091745550f, 0.997564050259824200f,
|
||||
0.998629534754573830f, 0.999390827019095760f, 0.999847695156391270f,
|
||||
1.000000000000000000f, 0.999847695156391270f, 0.999390827019095760f,
|
||||
0.998629534754573830f, 0.997564050259824200f,
|
||||
0.996194698091745550f, 0.994521895368273290f, 0.992546151641321980f,
|
||||
0.990268068741570360f, 0.987688340595137770f, 0.984807753012208020f,
|
||||
0.981627183447663980f, 0.978147600733805690f,
|
||||
0.974370064785235250f, 0.970295726275996470f, 0.965925826289068310f,
|
||||
0.961261695938318890f, 0.956304755963035440f, 0.951056516295153530f,
|
||||
0.945518575599316850f, 0.939692620785908430f,
|
||||
0.933580426497201740f, 0.927183854566787420f, 0.920504853452440370f,
|
||||
0.913545457642600870f, 0.906307787036649940f, 0.898794046299167040f,
|
||||
0.891006524188367900f, 0.882947592858926990f,
|
||||
0.874619707139395740f, 0.866025403784438710f, 0.857167300702112330f,
|
||||
0.848048096156425960f, 0.838670567945424050f, 0.829037572555041620f,
|
||||
0.819152044288991800f, 0.809016994374947450f,
|
||||
0.798635510047292830f, 0.788010753606722010f, 0.777145961456970900f,
|
||||
0.766044443118978010f, 0.754709580222772010f, 0.743144825477394240f,
|
||||
0.731353701619170570f, 0.719339800338651190f,
|
||||
0.707106781186547570f, 0.694658370458997370f, 0.681998360062498480f,
|
||||
0.669130606358858240f, 0.656059028990507280f, 0.642787609686539360f,
|
||||
0.629320391049837500f, 0.615661475325658290f,
|
||||
0.601815023152048270f, 0.587785252292473140f, 0.573576436351046050f,
|
||||
0.559192903470746790f, 0.544639035015027200f, 0.529919264233204900f,
|
||||
0.515038074910054380f, 0.500000000000000110f,
|
||||
0.484809620246337110f, 0.469471562785890860f, 0.453990499739546860f,
|
||||
0.438371146789077460f, 0.422618261740699440f, 0.406736643075800210f,
|
||||
0.390731128489273940f, 0.374606593415911960f,
|
||||
0.358367949545300380f, 0.342020143325668820f, 0.325568154457156760f,
|
||||
0.309016994374947450f, 0.292371704722736770f, 0.275637355816999160f,
|
||||
0.258819045102520740f, 0.241921895599667900f,
|
||||
0.224951054343864920f, 0.207911690817759450f, 0.190808995376544920f,
|
||||
0.173648177666930410f, 0.156434465040230920f, 0.139173100960065690f,
|
||||
0.121869343405147490f, 0.104528463267653460f,
|
||||
0.087155742747658138f, 0.069756473744125455f, 0.052335956242943966f,
|
||||
0.034899496702501080f, 0.017452406437283376f, 0.000000000000000061f,
|
||||
-0.017452406437283477f, -0.034899496702500733f,
|
||||
-0.052335956242943620f, -0.069756473744125330f, -0.087155742747658235f,
|
||||
-0.104528463267653330f, -0.121869343405147370f, -0.139173100960065350f,
|
||||
-0.156434465040231040f, -0.173648177666930300f,
|
||||
-0.190808995376544800f, -0.207911690817759120f, -0.224951054343864810f,
|
||||
-0.241921895599667790f, -0.258819045102520850f, -0.275637355816999050f,
|
||||
-0.292371704722736660f, -0.309016994374947340f,
|
||||
-0.325568154457156420f, -0.342020143325668710f, -0.358367949545300270f,
|
||||
-0.374606593415912070f, -0.390731128489273600f, -0.406736643075800100f,
|
||||
-0.422618261740699330f, -0.438371146789077510f,
|
||||
-0.453990499739546750f, -0.469471562785890530f, -0.484809620246337000f,
|
||||
-0.499999999999999780f, -0.515038074910054270f, -0.529919264233204790f,
|
||||
-0.544639035015027080f, -0.559192903470746680f,
|
||||
-0.573576436351045830f, -0.587785252292473030f, -0.601815023152048380f,
|
||||
-0.615661475325658290f, -0.629320391049837280f, -0.642787609686539360f,
|
||||
-0.656059028990507500f, -0.669130606358858240f,
|
||||
-0.681998360062498370f, -0.694658370458997030f, -0.707106781186547460f,
|
||||
-0.719339800338651300f, -0.731353701619170460f, -0.743144825477394130f,
|
||||
-0.754709580222772010f, -0.766044443118977900f,
|
||||
-0.777145961456970680f, -0.788010753606721900f, -0.798635510047292940f,
|
||||
-0.809016994374947340f, -0.819152044288991580f, -0.829037572555041620f,
|
||||
-0.838670567945424160f, -0.848048096156425960f,
|
||||
-0.857167300702112220f, -0.866025403784438710f, -0.874619707139395740f,
|
||||
-0.882947592858926770f, -0.891006524188367790f, -0.898794046299167040f,
|
||||
-0.906307787036649940f, -0.913545457642600760f,
|
||||
-0.920504853452440150f, -0.927183854566787310f, -0.933580426497201740f,
|
||||
-0.939692620785908320f, -0.945518575599316740f, -0.951056516295153530f,
|
||||
-0.956304755963035440f, -0.961261695938318670f,
|
||||
-0.965925826289068200f, -0.970295726275996470f, -0.974370064785235250f,
|
||||
-0.978147600733805690f, -0.981627183447663980f, -0.984807753012208020f,
|
||||
-0.987688340595137660f, -0.990268068741570250f,
|
||||
-0.992546151641321980f, -0.994521895368273290f, -0.996194698091745550f,
|
||||
-0.997564050259824200f, -0.998629534754573830f, -0.999390827019095760f,
|
||||
-0.999847695156391270f, -1.000000000000000000f
|
||||
};
|
||||
|
||||
/**
|
||||
* \par
|
||||
* Sine Table is generated from following loop
|
||||
* <pre>for(i = 0; i < 360; i++)
|
||||
* {
|
||||
* sinTable[i]= sin((i-180) * PI/180.0);
|
||||
* } </pre>
|
||||
*/
|
||||
|
||||
|
||||
static const float32_t sinTable[360] = {
|
||||
-0.017452406437283439f, -0.034899496702500699f, -0.052335956242943807f,
|
||||
-0.069756473744125524f, -0.087155742747658638f, -0.104528463267653730f,
|
||||
-0.121869343405147550f, -0.139173100960065740f,
|
||||
-0.156434465040230980f, -0.173648177666930280f, -0.190808995376544970f,
|
||||
-0.207911690817759310f, -0.224951054343864780f, -0.241921895599667730f,
|
||||
-0.258819045102521020f, -0.275637355816999660f,
|
||||
-0.292371704722737050f, -0.309016994374947510f, -0.325568154457156980f,
|
||||
-0.342020143325668880f, -0.358367949545300210f, -0.374606593415912240f,
|
||||
-0.390731128489274160f, -0.406736643075800430f,
|
||||
-0.422618261740699500f, -0.438371146789077290f, -0.453990499739546860f,
|
||||
-0.469471562785891080f, -0.484809620246337170f, -0.499999999999999940f,
|
||||
-0.515038074910054380f, -0.529919264233204900f,
|
||||
-0.544639035015026860f, -0.559192903470746900f, -0.573576436351046380f,
|
||||
-0.587785252292473250f, -0.601815023152048160f, -0.615661475325658400f,
|
||||
-0.629320391049837720f, -0.642787609686539470f,
|
||||
-0.656059028990507280f, -0.669130606358858350f, -0.681998360062498590f,
|
||||
-0.694658370458997140f, -0.707106781186547570f, -0.719339800338651410f,
|
||||
-0.731353701619170570f, -0.743144825477394240f,
|
||||
-0.754709580222771790f, -0.766044443118978010f, -0.777145961456971010f,
|
||||
-0.788010753606722010f, -0.798635510047292720f, -0.809016994374947450f,
|
||||
-0.819152044288992020f, -0.829037572555041740f,
|
||||
-0.838670567945424050f, -0.848048096156426070f, -0.857167300702112330f,
|
||||
-0.866025403784438710f, -0.874619707139395850f, -0.882947592858927100f,
|
||||
-0.891006524188367900f, -0.898794046299166930f,
|
||||
-0.906307787036650050f, -0.913545457642600980f, -0.920504853452440370f,
|
||||
-0.927183854566787420f, -0.933580426497201740f, -0.939692620785908430f,
|
||||
-0.945518575599316850f, -0.951056516295153640f,
|
||||
-0.956304755963035550f, -0.961261695938318890f, -0.965925826289068310f,
|
||||
-0.970295726275996470f, -0.974370064785235250f, -0.978147600733805690f,
|
||||
-0.981627183447663980f, -0.984807753012208020f,
|
||||
-0.987688340595137660f, -0.990268068741570360f, -0.992546151641322090f,
|
||||
-0.994521895368273400f, -0.996194698091745550f, -0.997564050259824200f,
|
||||
-0.998629534754573830f, -0.999390827019095760f,
|
||||
-0.999847695156391270f, -1.000000000000000000f, -0.999847695156391270f,
|
||||
-0.999390827019095760f, -0.998629534754573830f, -0.997564050259824200f,
|
||||
-0.996194698091745550f, -0.994521895368273290f,
|
||||
-0.992546151641321980f, -0.990268068741570250f, -0.987688340595137770f,
|
||||
-0.984807753012208020f, -0.981627183447663980f, -0.978147600733805580f,
|
||||
-0.974370064785235250f, -0.970295726275996470f,
|
||||
-0.965925826289068310f, -0.961261695938318890f, -0.956304755963035440f,
|
||||
-0.951056516295153530f, -0.945518575599316740f, -0.939692620785908320f,
|
||||
-0.933580426497201740f, -0.927183854566787420f,
|
||||
-0.920504853452440260f, -0.913545457642600870f, -0.906307787036649940f,
|
||||
-0.898794046299167040f, -0.891006524188367790f, -0.882947592858926880f,
|
||||
-0.874619707139395740f, -0.866025403784438600f,
|
||||
-0.857167300702112220f, -0.848048096156426070f, -0.838670567945423940f,
|
||||
-0.829037572555041740f, -0.819152044288991800f, -0.809016994374947450f,
|
||||
-0.798635510047292830f, -0.788010753606722010f,
|
||||
-0.777145961456970790f, -0.766044443118978010f, -0.754709580222772010f,
|
||||
-0.743144825477394240f, -0.731353701619170460f, -0.719339800338651080f,
|
||||
-0.707106781186547460f, -0.694658370458997250f,
|
||||
-0.681998360062498480f, -0.669130606358858240f, -0.656059028990507160f,
|
||||
-0.642787609686539250f, -0.629320391049837390f, -0.615661475325658180f,
|
||||
-0.601815023152048270f, -0.587785252292473140f,
|
||||
-0.573576436351046050f, -0.559192903470746900f, -0.544639035015027080f,
|
||||
-0.529919264233204900f, -0.515038074910054160f, -0.499999999999999940f,
|
||||
-0.484809620246337060f, -0.469471562785890810f,
|
||||
-0.453990499739546750f, -0.438371146789077400f, -0.422618261740699440f,
|
||||
-0.406736643075800150f, -0.390731128489273720f, -0.374606593415912010f,
|
||||
-0.358367949545300270f, -0.342020143325668710f,
|
||||
-0.325568154457156640f, -0.309016994374947400f, -0.292371704722736770f,
|
||||
-0.275637355816999160f, -0.258819045102520740f, -0.241921895599667730f,
|
||||
-0.224951054343865000f, -0.207911690817759310f,
|
||||
-0.190808995376544800f, -0.173648177666930330f, -0.156434465040230870f,
|
||||
-0.139173100960065440f, -0.121869343405147480f, -0.104528463267653460f,
|
||||
-0.087155742747658166f, -0.069756473744125302f,
|
||||
-0.052335956242943828f, -0.034899496702500969f, -0.017452406437283512f,
|
||||
0.000000000000000000f, 0.017452406437283512f, 0.034899496702500969f,
|
||||
0.052335956242943828f, 0.069756473744125302f,
|
||||
0.087155742747658166f, 0.104528463267653460f, 0.121869343405147480f,
|
||||
0.139173100960065440f, 0.156434465040230870f, 0.173648177666930330f,
|
||||
0.190808995376544800f, 0.207911690817759310f,
|
||||
0.224951054343865000f, 0.241921895599667730f, 0.258819045102520740f,
|
||||
0.275637355816999160f, 0.292371704722736770f, 0.309016994374947400f,
|
||||
0.325568154457156640f, 0.342020143325668710f,
|
||||
0.358367949545300270f, 0.374606593415912010f, 0.390731128489273720f,
|
||||
0.406736643075800150f, 0.422618261740699440f, 0.438371146789077400f,
|
||||
0.453990499739546750f, 0.469471562785890810f,
|
||||
0.484809620246337060f, 0.499999999999999940f, 0.515038074910054160f,
|
||||
0.529919264233204900f, 0.544639035015027080f, 0.559192903470746900f,
|
||||
0.573576436351046050f, 0.587785252292473140f,
|
||||
0.601815023152048270f, 0.615661475325658180f, 0.629320391049837390f,
|
||||
0.642787609686539250f, 0.656059028990507160f, 0.669130606358858240f,
|
||||
0.681998360062498480f, 0.694658370458997250f,
|
||||
0.707106781186547460f, 0.719339800338651080f, 0.731353701619170460f,
|
||||
0.743144825477394240f, 0.754709580222772010f, 0.766044443118978010f,
|
||||
0.777145961456970790f, 0.788010753606722010f,
|
||||
0.798635510047292830f, 0.809016994374947450f, 0.819152044288991800f,
|
||||
0.829037572555041740f, 0.838670567945423940f, 0.848048096156426070f,
|
||||
0.857167300702112220f, 0.866025403784438600f,
|
||||
0.874619707139395740f, 0.882947592858926880f, 0.891006524188367790f,
|
||||
0.898794046299167040f, 0.906307787036649940f, 0.913545457642600870f,
|
||||
0.920504853452440260f, 0.927183854566787420f,
|
||||
0.933580426497201740f, 0.939692620785908320f, 0.945518575599316740f,
|
||||
0.951056516295153530f, 0.956304755963035440f, 0.961261695938318890f,
|
||||
0.965925826289068310f, 0.970295726275996470f,
|
||||
0.974370064785235250f, 0.978147600733805580f, 0.981627183447663980f,
|
||||
0.984807753012208020f, 0.987688340595137770f, 0.990268068741570250f,
|
||||
0.992546151641321980f, 0.994521895368273290f,
|
||||
0.996194698091745550f, 0.997564050259824200f, 0.998629534754573830f,
|
||||
0.999390827019095760f, 0.999847695156391270f, 1.000000000000000000f,
|
||||
0.999847695156391270f, 0.999390827019095760f,
|
||||
0.998629534754573830f, 0.997564050259824200f, 0.996194698091745550f,
|
||||
0.994521895368273400f, 0.992546151641322090f, 0.990268068741570360f,
|
||||
0.987688340595137660f, 0.984807753012208020f,
|
||||
0.981627183447663980f, 0.978147600733805690f, 0.974370064785235250f,
|
||||
0.970295726275996470f, 0.965925826289068310f, 0.961261695938318890f,
|
||||
0.956304755963035550f, 0.951056516295153640f,
|
||||
0.945518575599316850f, 0.939692620785908430f, 0.933580426497201740f,
|
||||
0.927183854566787420f, 0.920504853452440370f, 0.913545457642600980f,
|
||||
0.906307787036650050f, 0.898794046299166930f,
|
||||
0.891006524188367900f, 0.882947592858927100f, 0.874619707139395850f,
|
||||
0.866025403784438710f, 0.857167300702112330f, 0.848048096156426070f,
|
||||
0.838670567945424050f, 0.829037572555041740f,
|
||||
0.819152044288992020f, 0.809016994374947450f, 0.798635510047292720f,
|
||||
0.788010753606722010f, 0.777145961456971010f, 0.766044443118978010f,
|
||||
0.754709580222771790f, 0.743144825477394240f,
|
||||
0.731353701619170570f, 0.719339800338651410f, 0.707106781186547570f,
|
||||
0.694658370458997140f, 0.681998360062498590f, 0.669130606358858350f,
|
||||
0.656059028990507280f, 0.642787609686539470f,
|
||||
0.629320391049837720f, 0.615661475325658400f, 0.601815023152048160f,
|
||||
0.587785252292473250f, 0.573576436351046380f, 0.559192903470746900f,
|
||||
0.544639035015026860f, 0.529919264233204900f,
|
||||
0.515038074910054380f, 0.499999999999999940f, 0.484809620246337170f,
|
||||
0.469471562785891080f, 0.453990499739546860f, 0.438371146789077290f,
|
||||
0.422618261740699500f, 0.406736643075800430f,
|
||||
0.390731128489274160f, 0.374606593415912240f, 0.358367949545300210f,
|
||||
0.342020143325668880f, 0.325568154457156980f, 0.309016994374947510f,
|
||||
0.292371704722737050f, 0.275637355816999660f,
|
||||
0.258819045102521020f, 0.241921895599667730f, 0.224951054343864780f,
|
||||
0.207911690817759310f, 0.190808995376544970f, 0.173648177666930280f,
|
||||
0.156434465040230980f, 0.139173100960065740f,
|
||||
0.121869343405147550f, 0.104528463267653730f, 0.087155742747658638f,
|
||||
0.069756473744125524f, 0.052335956242943807f, 0.034899496702500699f,
|
||||
0.017452406437283439f, 0.000000000000000122f
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief Floating-point sin_cos function.
|
||||
* @param[in] theta input value in degrees
|
||||
* @param[out] *pSinVal points to the processed sine output.
|
||||
* @param[out] *pCosVal points to the processed cos output.
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
|
||||
void arm_sin_cos_f32(
|
||||
float32_t theta,
|
||||
float32_t * pSinVal,
|
||||
float32_t * pCosVal)
|
||||
{
|
||||
int32_t i; /* Index for reading nearwst output values */
|
||||
float32_t x1 = -179.0f; /* Initial input value */
|
||||
float32_t y0, y1; /* nearest output values */
|
||||
float32_t y2, y3;
|
||||
float32_t fract; /* fractional part of input */
|
||||
|
||||
/* Calculation of fractional part */
|
||||
if(theta > 0.0f)
|
||||
{
|
||||
fract = theta - (float32_t) ((int32_t) theta);
|
||||
}
|
||||
else
|
||||
{
|
||||
fract = (theta - (float32_t) ((int32_t) theta)) + 1.0f;
|
||||
}
|
||||
|
||||
/* index calculation for reading nearest output values */
|
||||
i = (uint32_t) (theta - x1);
|
||||
|
||||
/* Checking min and max index of table */
|
||||
if(i < 0)
|
||||
{
|
||||
i = 0;
|
||||
}
|
||||
else if(i >= 359)
|
||||
{
|
||||
i = 358;
|
||||
}
|
||||
|
||||
/* reading nearest sine output values */
|
||||
y0 = sinTable[i];
|
||||
y1 = sinTable[i + 1u];
|
||||
|
||||
/* reading nearest cosine output values */
|
||||
y2 = cosTable[i];
|
||||
y3 = cosTable[i + 1u];
|
||||
|
||||
y1 = y1 - y0;
|
||||
y3 = y3 - y2;
|
||||
|
||||
y1 = fract * y1;
|
||||
y3 = fract * y3;
|
||||
|
||||
/* Calculation of sine value */
|
||||
*pSinVal = y0 + y1;
|
||||
|
||||
/* Calculation of cosine value */
|
||||
*pCosVal = y2 + y3;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of SinCos group
|
||||
*/
|
|
@ -1,324 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sin_cos_q31.c
|
||||
*
|
||||
* Description: Cosine & Sine calculation for Q31 values.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupController
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup SinCos
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* \par
|
||||
* Sine Table is generated from following loop
|
||||
* <pre>for(i = 0; i < 360; i++)
|
||||
* {
|
||||
* sinTable[i]= sin((i-180) * PI/180.0);
|
||||
* } </pre>
|
||||
* Convert above coefficients to fixed point 1.31 format.
|
||||
*/
|
||||
|
||||
static const int32_t sinTableQ31[360] = {
|
||||
|
||||
0x0, 0xfdc41e9b, 0xfb8869ce, 0xf94d0e2e, 0xf7123849, 0xf4d814a4, 0xf29ecfb2,
|
||||
0xf06695da,
|
||||
0xee2f9369, 0xebf9f498, 0xe9c5e582, 0xe7939223, 0xe5632654, 0xe334cdc9,
|
||||
0xe108b40d, 0xdedf047d,
|
||||
0xdcb7ea46, 0xda939061, 0xd8722192, 0xd653c860, 0xd438af17, 0xd220ffc0,
|
||||
0xd00ce422, 0xcdfc85bb,
|
||||
0xcbf00dbe, 0xc9e7a512, 0xc7e3744b, 0xc5e3a3a9, 0xc3e85b18, 0xc1f1c224,
|
||||
0xc0000000, 0xbe133b7c,
|
||||
0xbc2b9b05, 0xba4944a2, 0xb86c5df0, 0xb6950c1e, 0xb4c373ee, 0xb2f7b9af,
|
||||
0xb1320139, 0xaf726def,
|
||||
0xadb922b7, 0xac0641fb, 0xaa59eda4, 0xa8b4471a, 0xa7156f3c, 0xa57d8666,
|
||||
0xa3ecac65, 0xa263007d,
|
||||
0xa0e0a15f, 0x9f65ad2d, 0x9df24175, 0x9c867b2c, 0x9b2276b0, 0x99c64fc5,
|
||||
0x98722192, 0x9726069c,
|
||||
0x95e218c9, 0x94a6715d, 0x937328f5, 0x92485786, 0x9126145f, 0x900c7621,
|
||||
0x8efb92c2, 0x8df37f8b,
|
||||
0x8cf45113, 0x8bfe1b3f, 0x8b10f144, 0x8a2ce59f, 0x89520a1a, 0x88806fc4,
|
||||
0x87b826f7, 0x86f93f50,
|
||||
0x8643c7b3, 0x8597ce46, 0x84f56073, 0x845c8ae3, 0x83cd5982, 0x8347d77b,
|
||||
0x82cc0f36, 0x825a0a5b,
|
||||
0x81f1d1ce, 0x81936daf, 0x813ee55b, 0x80f43f69, 0x80b381ac, 0x807cb130,
|
||||
0x804fd23a, 0x802ce84c,
|
||||
0x8013f61d, 0x8004fda0, 0x80000000, 0x8004fda0, 0x8013f61d, 0x802ce84c,
|
||||
0x804fd23a, 0x807cb130,
|
||||
0x80b381ac, 0x80f43f69, 0x813ee55b, 0x81936daf, 0x81f1d1ce, 0x825a0a5b,
|
||||
0x82cc0f36, 0x8347d77b,
|
||||
0x83cd5982, 0x845c8ae3, 0x84f56073, 0x8597ce46, 0x8643c7b3, 0x86f93f50,
|
||||
0x87b826f7, 0x88806fc4,
|
||||
0x89520a1a, 0x8a2ce59f, 0x8b10f144, 0x8bfe1b3f, 0x8cf45113, 0x8df37f8b,
|
||||
0x8efb92c2, 0x900c7621,
|
||||
0x9126145f, 0x92485786, 0x937328f5, 0x94a6715d, 0x95e218c9, 0x9726069c,
|
||||
0x98722192, 0x99c64fc5,
|
||||
0x9b2276b0, 0x9c867b2c, 0x9df24175, 0x9f65ad2d, 0xa0e0a15f, 0xa263007d,
|
||||
0xa3ecac65, 0xa57d8666,
|
||||
0xa7156f3c, 0xa8b4471a, 0xaa59eda4, 0xac0641fb, 0xadb922b7, 0xaf726def,
|
||||
0xb1320139, 0xb2f7b9af,
|
||||
0xb4c373ee, 0xb6950c1e, 0xb86c5df0, 0xba4944a2, 0xbc2b9b05, 0xbe133b7c,
|
||||
0xc0000000, 0xc1f1c224,
|
||||
0xc3e85b18, 0xc5e3a3a9, 0xc7e3744b, 0xc9e7a512, 0xcbf00dbe, 0xcdfc85bb,
|
||||
0xd00ce422, 0xd220ffc0,
|
||||
0xd438af17, 0xd653c860, 0xd8722192, 0xda939061, 0xdcb7ea46, 0xdedf047d,
|
||||
0xe108b40d, 0xe334cdc9,
|
||||
0xe5632654, 0xe7939223, 0xe9c5e582, 0xebf9f498, 0xee2f9369, 0xf06695da,
|
||||
0xf29ecfb2, 0xf4d814a4,
|
||||
0xf7123849, 0xf94d0e2e, 0xfb8869ce, 0xfdc41e9b, 0x0, 0x23be165, 0x4779632,
|
||||
0x6b2f1d2,
|
||||
0x8edc7b7, 0xb27eb5c, 0xd61304e, 0xf996a26, 0x11d06c97, 0x14060b68,
|
||||
0x163a1a7e, 0x186c6ddd,
|
||||
0x1a9cd9ac, 0x1ccb3237, 0x1ef74bf3, 0x2120fb83, 0x234815ba, 0x256c6f9f,
|
||||
0x278dde6e, 0x29ac37a0,
|
||||
0x2bc750e9, 0x2ddf0040, 0x2ff31bde, 0x32037a45, 0x340ff242, 0x36185aee,
|
||||
0x381c8bb5, 0x3a1c5c57,
|
||||
0x3c17a4e8, 0x3e0e3ddc, 0x40000000, 0x41ecc484, 0x43d464fb, 0x45b6bb5e,
|
||||
0x4793a210, 0x496af3e2,
|
||||
0x4b3c8c12, 0x4d084651, 0x4ecdfec7, 0x508d9211, 0x5246dd49, 0x53f9be05,
|
||||
0x55a6125c, 0x574bb8e6,
|
||||
0x58ea90c4, 0x5a82799a, 0x5c13539b, 0x5d9cff83, 0x5f1f5ea1, 0x609a52d3,
|
||||
0x620dbe8b, 0x637984d4,
|
||||
0x64dd8950, 0x6639b03b, 0x678dde6e, 0x68d9f964, 0x6a1de737, 0x6b598ea3,
|
||||
0x6c8cd70b, 0x6db7a87a,
|
||||
0x6ed9eba1, 0x6ff389df, 0x71046d3e, 0x720c8075, 0x730baeed, 0x7401e4c1,
|
||||
0x74ef0ebc, 0x75d31a61,
|
||||
0x76adf5e6, 0x777f903c, 0x7847d909, 0x7906c0b0, 0x79bc384d, 0x7a6831ba,
|
||||
0x7b0a9f8d, 0x7ba3751d,
|
||||
0x7c32a67e, 0x7cb82885, 0x7d33f0ca, 0x7da5f5a5, 0x7e0e2e32, 0x7e6c9251,
|
||||
0x7ec11aa5, 0x7f0bc097,
|
||||
0x7f4c7e54, 0x7f834ed0, 0x7fb02dc6, 0x7fd317b4, 0x7fec09e3, 0x7ffb0260,
|
||||
0x7fffffff, 0x7ffb0260,
|
||||
0x7fec09e3, 0x7fd317b4, 0x7fb02dc6, 0x7f834ed0, 0x7f4c7e54, 0x7f0bc097,
|
||||
0x7ec11aa5, 0x7e6c9251,
|
||||
0x7e0e2e32, 0x7da5f5a5, 0x7d33f0ca, 0x7cb82885, 0x7c32a67e, 0x7ba3751d,
|
||||
0x7b0a9f8d, 0x7a6831ba,
|
||||
0x79bc384d, 0x7906c0b0, 0x7847d909, 0x777f903c, 0x76adf5e6, 0x75d31a61,
|
||||
0x74ef0ebc, 0x7401e4c1,
|
||||
0x730baeed, 0x720c8075, 0x71046d3e, 0x6ff389df, 0x6ed9eba1, 0x6db7a87a,
|
||||
0x6c8cd70b, 0x6b598ea3,
|
||||
0x6a1de737, 0x68d9f964, 0x678dde6e, 0x6639b03b, 0x64dd8950, 0x637984d4,
|
||||
0x620dbe8b, 0x609a52d3,
|
||||
0x5f1f5ea1, 0x5d9cff83, 0x5c13539b, 0x5a82799a, 0x58ea90c4, 0x574bb8e6,
|
||||
0x55a6125c, 0x53f9be05,
|
||||
0x5246dd49, 0x508d9211, 0x4ecdfec7, 0x4d084651, 0x4b3c8c12, 0x496af3e2,
|
||||
0x4793a210, 0x45b6bb5e,
|
||||
0x43d464fb, 0x41ecc484, 0x40000000, 0x3e0e3ddc, 0x3c17a4e8, 0x3a1c5c57,
|
||||
0x381c8bb5, 0x36185aee,
|
||||
0x340ff242, 0x32037a45, 0x2ff31bde, 0x2ddf0040, 0x2bc750e9, 0x29ac37a0,
|
||||
0x278dde6e, 0x256c6f9f,
|
||||
0x234815ba, 0x2120fb83, 0x1ef74bf3, 0x1ccb3237, 0x1a9cd9ac, 0x186c6ddd,
|
||||
0x163a1a7e, 0x14060b68,
|
||||
0x11d06c97, 0xf996a26, 0xd61304e, 0xb27eb5c, 0x8edc7b7, 0x6b2f1d2,
|
||||
0x4779632, 0x23be165,
|
||||
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* \par
|
||||
* Cosine Table is generated from following loop
|
||||
* <pre>for(i = 0; i < 360; i++)
|
||||
* {
|
||||
* cosTable[i]= cos((i-180) * PI/180.0);
|
||||
* } </pre>
|
||||
* \par
|
||||
* Convert above coefficients to fixed point 1.31 format.
|
||||
*/
|
||||
static const int32_t cosTableQ31[360] = {
|
||||
0x80000000, 0x8004fda0, 0x8013f61d, 0x802ce84c, 0x804fd23a, 0x807cb130,
|
||||
0x80b381ac, 0x80f43f69,
|
||||
0x813ee55b, 0x81936daf, 0x81f1d1ce, 0x825a0a5b, 0x82cc0f36, 0x8347d77b,
|
||||
0x83cd5982, 0x845c8ae3,
|
||||
0x84f56073, 0x8597ce46, 0x8643c7b3, 0x86f93f50, 0x87b826f7, 0x88806fc4,
|
||||
0x89520a1a, 0x8a2ce59f,
|
||||
0x8b10f144, 0x8bfe1b3f, 0x8cf45113, 0x8df37f8b, 0x8efb92c2, 0x900c7621,
|
||||
0x9126145f, 0x92485786,
|
||||
0x937328f5, 0x94a6715d, 0x95e218c9, 0x9726069c, 0x98722192, 0x99c64fc5,
|
||||
0x9b2276b0, 0x9c867b2c,
|
||||
0x9df24175, 0x9f65ad2d, 0xa0e0a15f, 0xa263007d, 0xa3ecac65, 0xa57d8666,
|
||||
0xa7156f3c, 0xa8b4471a,
|
||||
0xaa59eda4, 0xac0641fb, 0xadb922b7, 0xaf726def, 0xb1320139, 0xb2f7b9af,
|
||||
0xb4c373ee, 0xb6950c1e,
|
||||
0xb86c5df0, 0xba4944a2, 0xbc2b9b05, 0xbe133b7c, 0xc0000000, 0xc1f1c224,
|
||||
0xc3e85b18, 0xc5e3a3a9,
|
||||
0xc7e3744b, 0xc9e7a512, 0xcbf00dbe, 0xcdfc85bb, 0xd00ce422, 0xd220ffc0,
|
||||
0xd438af17, 0xd653c860,
|
||||
0xd8722192, 0xda939061, 0xdcb7ea46, 0xdedf047d, 0xe108b40d, 0xe334cdc9,
|
||||
0xe5632654, 0xe7939223,
|
||||
0xe9c5e582, 0xebf9f498, 0xee2f9369, 0xf06695da, 0xf29ecfb2, 0xf4d814a4,
|
||||
0xf7123849, 0xf94d0e2e,
|
||||
0xfb8869ce, 0xfdc41e9b, 0x0, 0x23be165, 0x4779632, 0x6b2f1d2, 0x8edc7b7,
|
||||
0xb27eb5c,
|
||||
0xd61304e, 0xf996a26, 0x11d06c97, 0x14060b68, 0x163a1a7e, 0x186c6ddd,
|
||||
0x1a9cd9ac, 0x1ccb3237,
|
||||
0x1ef74bf3, 0x2120fb83, 0x234815ba, 0x256c6f9f, 0x278dde6e, 0x29ac37a0,
|
||||
0x2bc750e9, 0x2ddf0040,
|
||||
0x2ff31bde, 0x32037a45, 0x340ff242, 0x36185aee, 0x381c8bb5, 0x3a1c5c57,
|
||||
0x3c17a4e8, 0x3e0e3ddc,
|
||||
0x40000000, 0x41ecc484, 0x43d464fb, 0x45b6bb5e, 0x4793a210, 0x496af3e2,
|
||||
0x4b3c8c12, 0x4d084651,
|
||||
0x4ecdfec7, 0x508d9211, 0x5246dd49, 0x53f9be05, 0x55a6125c, 0x574bb8e6,
|
||||
0x58ea90c4, 0x5a82799a,
|
||||
0x5c13539b, 0x5d9cff83, 0x5f1f5ea1, 0x609a52d3, 0x620dbe8b, 0x637984d4,
|
||||
0x64dd8950, 0x6639b03b,
|
||||
0x678dde6e, 0x68d9f964, 0x6a1de737, 0x6b598ea3, 0x6c8cd70b, 0x6db7a87a,
|
||||
0x6ed9eba1, 0x6ff389df,
|
||||
0x71046d3e, 0x720c8075, 0x730baeed, 0x7401e4c1, 0x74ef0ebc, 0x75d31a61,
|
||||
0x76adf5e6, 0x777f903c,
|
||||
0x7847d909, 0x7906c0b0, 0x79bc384d, 0x7a6831ba, 0x7b0a9f8d, 0x7ba3751d,
|
||||
0x7c32a67e, 0x7cb82885,
|
||||
0x7d33f0ca, 0x7da5f5a5, 0x7e0e2e32, 0x7e6c9251, 0x7ec11aa5, 0x7f0bc097,
|
||||
0x7f4c7e54, 0x7f834ed0,
|
||||
0x7fb02dc6, 0x7fd317b4, 0x7fec09e3, 0x7ffb0260, 0x7fffffff, 0x7ffb0260,
|
||||
0x7fec09e3, 0x7fd317b4,
|
||||
0x7fb02dc6, 0x7f834ed0, 0x7f4c7e54, 0x7f0bc097, 0x7ec11aa5, 0x7e6c9251,
|
||||
0x7e0e2e32, 0x7da5f5a5,
|
||||
0x7d33f0ca, 0x7cb82885, 0x7c32a67e, 0x7ba3751d, 0x7b0a9f8d, 0x7a6831ba,
|
||||
0x79bc384d, 0x7906c0b0,
|
||||
0x7847d909, 0x777f903c, 0x76adf5e6, 0x75d31a61, 0x74ef0ebc, 0x7401e4c1,
|
||||
0x730baeed, 0x720c8075,
|
||||
0x71046d3e, 0x6ff389df, 0x6ed9eba1, 0x6db7a87a, 0x6c8cd70b, 0x6b598ea3,
|
||||
0x6a1de737, 0x68d9f964,
|
||||
0x678dde6e, 0x6639b03b, 0x64dd8950, 0x637984d4, 0x620dbe8b, 0x609a52d3,
|
||||
0x5f1f5ea1, 0x5d9cff83,
|
||||
0x5c13539b, 0x5a82799a, 0x58ea90c4, 0x574bb8e6, 0x55a6125c, 0x53f9be05,
|
||||
0x5246dd49, 0x508d9211,
|
||||
0x4ecdfec7, 0x4d084651, 0x4b3c8c12, 0x496af3e2, 0x4793a210, 0x45b6bb5e,
|
||||
0x43d464fb, 0x41ecc484,
|
||||
0x40000000, 0x3e0e3ddc, 0x3c17a4e8, 0x3a1c5c57, 0x381c8bb5, 0x36185aee,
|
||||
0x340ff242, 0x32037a45,
|
||||
0x2ff31bde, 0x2ddf0040, 0x2bc750e9, 0x29ac37a0, 0x278dde6e, 0x256c6f9f,
|
||||
0x234815ba, 0x2120fb83,
|
||||
0x1ef74bf3, 0x1ccb3237, 0x1a9cd9ac, 0x186c6ddd, 0x163a1a7e, 0x14060b68,
|
||||
0x11d06c97, 0xf996a26,
|
||||
0xd61304e, 0xb27eb5c, 0x8edc7b7, 0x6b2f1d2, 0x4779632, 0x23be165, 0x0,
|
||||
0xfdc41e9b,
|
||||
0xfb8869ce, 0xf94d0e2e, 0xf7123849, 0xf4d814a4, 0xf29ecfb2, 0xf06695da,
|
||||
0xee2f9369, 0xebf9f498,
|
||||
0xe9c5e582, 0xe7939223, 0xe5632654, 0xe334cdc9, 0xe108b40d, 0xdedf047d,
|
||||
0xdcb7ea46, 0xda939061,
|
||||
0xd8722192, 0xd653c860, 0xd438af17, 0xd220ffc0, 0xd00ce422, 0xcdfc85bb,
|
||||
0xcbf00dbe, 0xc9e7a512,
|
||||
0xc7e3744b, 0xc5e3a3a9, 0xc3e85b18, 0xc1f1c224, 0xc0000000, 0xbe133b7c,
|
||||
0xbc2b9b05, 0xba4944a2,
|
||||
0xb86c5df0, 0xb6950c1e, 0xb4c373ee, 0xb2f7b9af, 0xb1320139, 0xaf726def,
|
||||
0xadb922b7, 0xac0641fb,
|
||||
0xaa59eda4, 0xa8b4471a, 0xa7156f3c, 0xa57d8666, 0xa3ecac65, 0xa263007d,
|
||||
0xa0e0a15f, 0x9f65ad2d,
|
||||
0x9df24175, 0x9c867b2c, 0x9b2276b0, 0x99c64fc5, 0x98722192, 0x9726069c,
|
||||
0x95e218c9, 0x94a6715d,
|
||||
0x937328f5, 0x92485786, 0x9126145f, 0x900c7621, 0x8efb92c2, 0x8df37f8b,
|
||||
0x8cf45113, 0x8bfe1b3f,
|
||||
0x8b10f144, 0x8a2ce59f, 0x89520a1a, 0x88806fc4, 0x87b826f7, 0x86f93f50,
|
||||
0x8643c7b3, 0x8597ce46,
|
||||
0x84f56073, 0x845c8ae3, 0x83cd5982, 0x8347d77b, 0x82cc0f36, 0x825a0a5b,
|
||||
0x81f1d1ce, 0x81936daf,
|
||||
0x813ee55b, 0x80f43f69, 0x80b381ac, 0x807cb130, 0x804fd23a, 0x802ce84c,
|
||||
0x8013f61d, 0x8004fda0,
|
||||
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief Q31 sin_cos function.
|
||||
* @param[in] theta scaled input value in degrees
|
||||
* @param[out] *pSinVal points to the processed sine output.
|
||||
* @param[out] *pCosVal points to the processed cosine output.
|
||||
* @return none.
|
||||
*
|
||||
* The Q31 input value is in the range [-1 0.999999] and is mapped to a degree value in the range [-180 179].
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
void arm_sin_cos_q31(
|
||||
q31_t theta,
|
||||
q31_t * pSinVal,
|
||||
q31_t * pCosVal)
|
||||
{
|
||||
q31_t x0; /* Nearest input value */
|
||||
q31_t y0, y1; /* Nearest output values */
|
||||
q31_t xSpacing = INPUT_SPACING; /* Spaing between inputs */
|
||||
int32_t i; /* Index */
|
||||
q31_t oneByXSpacing; /* 1/ xSpacing value */
|
||||
q31_t out; /* temporary variable */
|
||||
uint32_t sign_bits; /* No.of sign bits */
|
||||
uint32_t firstX = 0x80000000; /* First X value */
|
||||
|
||||
/* Calculation of index */
|
||||
i = ((uint32_t) theta - firstX) / (uint32_t) xSpacing;
|
||||
|
||||
/* Checking min and max index of table */
|
||||
if(i < 0)
|
||||
{
|
||||
i = 0;
|
||||
}
|
||||
else if(i >= 359)
|
||||
{
|
||||
i = 358;
|
||||
}
|
||||
|
||||
/* Calculation of first nearest input value */
|
||||
x0 = (q31_t) firstX + ((q31_t) i * xSpacing);
|
||||
|
||||
/* Reading nearest sine output values from table */
|
||||
y0 = sinTableQ31[i];
|
||||
y1 = sinTableQ31[i + 1u];
|
||||
|
||||
/* Calculation of 1/(x1-x0) */
|
||||
/* (x1-x0) is xSpacing which is fixed value */
|
||||
sign_bits = 8u;
|
||||
oneByXSpacing = 0x5A000000;
|
||||
|
||||
/* Calculation of (theta - x0)/(x1-x0) */
|
||||
out =
|
||||
(((q31_t) (((q63_t) (theta - x0) * oneByXSpacing) >> 32)) << sign_bits);
|
||||
|
||||
/* Calculation of y0 + (y1 - y0) * ((theta - x0)/(x1-x0)) */
|
||||
*pSinVal = __QADD(y0, ((q31_t) (((q63_t) (y1 - y0) * out) >> 30)));
|
||||
|
||||
/* Reading nearest cosine output values from table */
|
||||
y0 = cosTableQ31[i];
|
||||
y1 = cosTableQ31[i + 1u];
|
||||
|
||||
/* Calculation of y0 + (y1 - y0) * ((theta - x0)/(x1-x0)) */
|
||||
*pCosVal = __QADD(y0, ((q31_t) (((q63_t) (y1 - y0) * out) >> 30)));
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of SinCos group
|
||||
*/
|
|
@ -1,280 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cos_f32.c
|
||||
*
|
||||
* Description: Fast cosine calculation for floating-point values.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
/**
|
||||
* @ingroup groupFastMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup cos Cosine
|
||||
*
|
||||
* Computes the trigonometric cosine function using a combination of table lookup
|
||||
* and cubic interpolation. There are separate functions for
|
||||
* Q15, Q31, and floating-point data types.
|
||||
* The input to the floating-point version is in radians while the
|
||||
* fixed-point Q15 and Q31 have a scaled input with the range
|
||||
* [0 +0.9999] mapping to [0 2*pi), Where range excludes 2*pi.
|
||||
*
|
||||
* The implementation is based on table lookup using 256 values together with cubic interpolation.
|
||||
* The steps used are:
|
||||
* -# Calculation of the nearest integer table index
|
||||
* -# Fetch the four table values a, b, c, and d
|
||||
* -# Compute the fractional portion (fract) of the table index.
|
||||
* -# Calculation of wa, wb, wc, wd
|
||||
* -# The final result equals <code>a*wa + b*wb + c*wc + d*wd</code>
|
||||
*
|
||||
* where
|
||||
* <pre>
|
||||
* a=Table[index-1];
|
||||
* b=Table[index+0];
|
||||
* c=Table[index+1];
|
||||
* d=Table[index+2];
|
||||
* </pre>
|
||||
* and
|
||||
* <pre>
|
||||
* wa=-(1/6)*fract.^3 + (1/2)*fract.^2 - (1/3)*fract;
|
||||
* wb=(1/2)*fract.^3 - fract.^2 - (1/2)*fract + 1;
|
||||
* wc=-(1/2)*fract.^3+(1/2)*fract.^2+fract;
|
||||
* wd=(1/6)*fract.^3 - (1/6)*fract;
|
||||
* </pre>
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cos
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* \par
|
||||
* <b>Example code for Generation of Cos Table:</b>
|
||||
* tableSize = 256;
|
||||
* <pre>for(n = -1; n < (tableSize + 2); n++)
|
||||
* {
|
||||
* cosTable[n+1]= cos(2*pi*n/tableSize);
|
||||
* } </pre>
|
||||
* where pi value is 3.14159265358979
|
||||
*/
|
||||
|
||||
static const float32_t cosTable[260] = {
|
||||
0.999698817729949950f, 1.000000000000000000f, 0.999698817729949950f,
|
||||
0.998795449733734130f, 0.997290432453155520f, 0.995184719562530520f,
|
||||
0.992479562759399410f, 0.989176511764526370f,
|
||||
0.985277652740478520f, 0.980785250663757320f, 0.975702106952667240f,
|
||||
0.970031261444091800f, 0.963776051998138430f, 0.956940352916717530f,
|
||||
0.949528157711029050f, 0.941544055938720700f,
|
||||
0.932992815971374510f, 0.923879504203796390f, 0.914209783077239990f,
|
||||
0.903989315032958980f, 0.893224298954010010f, 0.881921291351318360f,
|
||||
0.870086967945098880f, 0.857728600502014160f,
|
||||
0.844853579998016360f, 0.831469595432281490f, 0.817584812641143800f,
|
||||
0.803207516670227050f, 0.788346409797668460f, 0.773010432720184330f,
|
||||
0.757208824157714840f, 0.740951120853424070f,
|
||||
0.724247097969055180f, 0.707106769084930420f, 0.689540565013885500f,
|
||||
0.671558976173400880f, 0.653172850608825680f, 0.634393274784088130f,
|
||||
0.615231573581695560f, 0.595699310302734380f,
|
||||
0.575808167457580570f, 0.555570244789123540f, 0.534997642040252690f,
|
||||
0.514102756977081300f, 0.492898195981979370f, 0.471396744251251220f,
|
||||
0.449611335992813110f, 0.427555084228515630f,
|
||||
0.405241310596466060f, 0.382683426141738890f, 0.359895050525665280f,
|
||||
0.336889863014221190f, 0.313681751489639280f, 0.290284663438797000f,
|
||||
0.266712754964828490f, 0.242980182170867920f,
|
||||
0.219101235270500180f, 0.195090323686599730f, 0.170961886644363400f,
|
||||
0.146730467677116390f, 0.122410677373409270f, 0.098017141222953796f,
|
||||
0.073564566671848297f, 0.049067676067352295f,
|
||||
0.024541229009628296f, 0.000000000000000061f, -0.024541229009628296f,
|
||||
-0.049067676067352295f, -0.073564566671848297f, -0.098017141222953796f,
|
||||
-0.122410677373409270f, -0.146730467677116390f,
|
||||
-0.170961886644363400f, -0.195090323686599730f, -0.219101235270500180f,
|
||||
-0.242980182170867920f, -0.266712754964828490f, -0.290284663438797000f,
|
||||
-0.313681751489639280f, -0.336889863014221190f,
|
||||
-0.359895050525665280f, -0.382683426141738890f, -0.405241310596466060f,
|
||||
-0.427555084228515630f, -0.449611335992813110f, -0.471396744251251220f,
|
||||
-0.492898195981979370f, -0.514102756977081300f,
|
||||
-0.534997642040252690f, -0.555570244789123540f, -0.575808167457580570f,
|
||||
-0.595699310302734380f, -0.615231573581695560f, -0.634393274784088130f,
|
||||
-0.653172850608825680f, -0.671558976173400880f,
|
||||
-0.689540565013885500f, -0.707106769084930420f, -0.724247097969055180f,
|
||||
-0.740951120853424070f, -0.757208824157714840f, -0.773010432720184330f,
|
||||
-0.788346409797668460f, -0.803207516670227050f,
|
||||
-0.817584812641143800f, -0.831469595432281490f, -0.844853579998016360f,
|
||||
-0.857728600502014160f, -0.870086967945098880f, -0.881921291351318360f,
|
||||
-0.893224298954010010f, -0.903989315032958980f,
|
||||
-0.914209783077239990f, -0.923879504203796390f, -0.932992815971374510f,
|
||||
-0.941544055938720700f, -0.949528157711029050f, -0.956940352916717530f,
|
||||
-0.963776051998138430f, -0.970031261444091800f,
|
||||
-0.975702106952667240f, -0.980785250663757320f, -0.985277652740478520f,
|
||||
-0.989176511764526370f, -0.992479562759399410f, -0.995184719562530520f,
|
||||
-0.997290432453155520f, -0.998795449733734130f,
|
||||
-0.999698817729949950f, -1.000000000000000000f, -0.999698817729949950f,
|
||||
-0.998795449733734130f, -0.997290432453155520f, -0.995184719562530520f,
|
||||
-0.992479562759399410f, -0.989176511764526370f,
|
||||
-0.985277652740478520f, -0.980785250663757320f, -0.975702106952667240f,
|
||||
-0.970031261444091800f, -0.963776051998138430f, -0.956940352916717530f,
|
||||
-0.949528157711029050f, -0.941544055938720700f,
|
||||
-0.932992815971374510f, -0.923879504203796390f, -0.914209783077239990f,
|
||||
-0.903989315032958980f, -0.893224298954010010f, -0.881921291351318360f,
|
||||
-0.870086967945098880f, -0.857728600502014160f,
|
||||
-0.844853579998016360f, -0.831469595432281490f, -0.817584812641143800f,
|
||||
-0.803207516670227050f, -0.788346409797668460f, -0.773010432720184330f,
|
||||
-0.757208824157714840f, -0.740951120853424070f,
|
||||
-0.724247097969055180f, -0.707106769084930420f, -0.689540565013885500f,
|
||||
-0.671558976173400880f, -0.653172850608825680f, -0.634393274784088130f,
|
||||
-0.615231573581695560f, -0.595699310302734380f,
|
||||
-0.575808167457580570f, -0.555570244789123540f, -0.534997642040252690f,
|
||||
-0.514102756977081300f, -0.492898195981979370f, -0.471396744251251220f,
|
||||
-0.449611335992813110f, -0.427555084228515630f,
|
||||
-0.405241310596466060f, -0.382683426141738890f, -0.359895050525665280f,
|
||||
-0.336889863014221190f, -0.313681751489639280f, -0.290284663438797000f,
|
||||
-0.266712754964828490f, -0.242980182170867920f,
|
||||
-0.219101235270500180f, -0.195090323686599730f, -0.170961886644363400f,
|
||||
-0.146730467677116390f, -0.122410677373409270f, -0.098017141222953796f,
|
||||
-0.073564566671848297f, -0.049067676067352295f,
|
||||
-0.024541229009628296f, -0.000000000000000184f, 0.024541229009628296f,
|
||||
0.049067676067352295f, 0.073564566671848297f, 0.098017141222953796f,
|
||||
0.122410677373409270f, 0.146730467677116390f,
|
||||
0.170961886644363400f, 0.195090323686599730f, 0.219101235270500180f,
|
||||
0.242980182170867920f, 0.266712754964828490f, 0.290284663438797000f,
|
||||
0.313681751489639280f, 0.336889863014221190f,
|
||||
0.359895050525665280f, 0.382683426141738890f, 0.405241310596466060f,
|
||||
0.427555084228515630f, 0.449611335992813110f, 0.471396744251251220f,
|
||||
0.492898195981979370f, 0.514102756977081300f,
|
||||
0.534997642040252690f, 0.555570244789123540f, 0.575808167457580570f,
|
||||
0.595699310302734380f, 0.615231573581695560f, 0.634393274784088130f,
|
||||
0.653172850608825680f, 0.671558976173400880f,
|
||||
0.689540565013885500f, 0.707106769084930420f, 0.724247097969055180f,
|
||||
0.740951120853424070f, 0.757208824157714840f, 0.773010432720184330f,
|
||||
0.788346409797668460f, 0.803207516670227050f,
|
||||
0.817584812641143800f, 0.831469595432281490f, 0.844853579998016360f,
|
||||
0.857728600502014160f, 0.870086967945098880f, 0.881921291351318360f,
|
||||
0.893224298954010010f, 0.903989315032958980f,
|
||||
0.914209783077239990f, 0.923879504203796390f, 0.932992815971374510f,
|
||||
0.941544055938720700f, 0.949528157711029050f, 0.956940352916717530f,
|
||||
0.963776051998138430f, 0.970031261444091800f,
|
||||
0.975702106952667240f, 0.980785250663757320f, 0.985277652740478520f,
|
||||
0.989176511764526370f, 0.992479562759399410f, 0.995184719562530520f,
|
||||
0.997290432453155520f, 0.998795449733734130f,
|
||||
0.999698817729949950f, 1.000000000000000000f, 0.999698817729949950f,
|
||||
0.998795449733734130f
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Fast approximation to the trigonometric cosine function for floating-point data.
|
||||
* @param[in] x input value in radians.
|
||||
* @return cos(x).
|
||||
*/
|
||||
|
||||
|
||||
float32_t arm_cos_f32(
|
||||
float32_t x)
|
||||
{
|
||||
float32_t cosVal, fract, in;
|
||||
int32_t index;
|
||||
uint32_t tableSize = (uint32_t) TABLE_SIZE;
|
||||
float32_t wa, wb, wc, wd;
|
||||
float32_t a, b, c, d;
|
||||
float32_t *tablePtr;
|
||||
int32_t n;
|
||||
float32_t fractsq, fractby2, fractby6, fractby3, fractsqby2;
|
||||
float32_t oneminusfractby2;
|
||||
float32_t frby2xfrsq, frby6xfrsq;
|
||||
|
||||
/* input x is in radians */
|
||||
/* Scale the input to [0 1] range from [0 2*PI] , divide input by 2*pi */
|
||||
in = x * 0.159154943092f;
|
||||
|
||||
/* Calculation of floor value of input */
|
||||
n = (int32_t) in;
|
||||
|
||||
/* Make negative values towards -infinity */
|
||||
if(x < 0.0f)
|
||||
{
|
||||
n = n - 1;
|
||||
}
|
||||
|
||||
/* Map input value to [0 1] */
|
||||
in = in - (float32_t) n;
|
||||
|
||||
/* Calculation of index of the table */
|
||||
index = (uint32_t) (tableSize * in);
|
||||
|
||||
/* fractional value calculation */
|
||||
fract = ((float32_t) tableSize * in) - (float32_t) index;
|
||||
|
||||
/* Checking min and max index of table */
|
||||
if(index < 0)
|
||||
{
|
||||
index = 0;
|
||||
}
|
||||
else if(index > 256)
|
||||
{
|
||||
index = 256;
|
||||
}
|
||||
|
||||
/* Initialise table pointer */
|
||||
tablePtr = (float32_t *) & cosTable[index];
|
||||
|
||||
/* Read four nearest values of input value from the cos table */
|
||||
a = tablePtr[0];
|
||||
b = tablePtr[1];
|
||||
c = tablePtr[2];
|
||||
d = tablePtr[3];
|
||||
|
||||
/* Cubic interpolation process */
|
||||
fractsq = fract * fract;
|
||||
fractby2 = fract * 0.5f;
|
||||
fractby6 = fract * 0.166666667f;
|
||||
fractby3 = fract * 0.3333333333333f;
|
||||
fractsqby2 = fractsq * 0.5f;
|
||||
frby2xfrsq = (fractby2) * fractsq;
|
||||
frby6xfrsq = (fractby6) * fractsq;
|
||||
oneminusfractby2 = 1.0f - fractby2;
|
||||
wb = fractsqby2 - fractby3;
|
||||
wc = (fractsqby2 + fract);
|
||||
wa = wb - frby6xfrsq;
|
||||
wb = frby2xfrsq - fractsq;
|
||||
cosVal = wa * a;
|
||||
wc = wc - frby2xfrsq;
|
||||
wd = (frby6xfrsq) - fractby6;
|
||||
wb = wb + oneminusfractby2;
|
||||
|
||||
/* Calculate cos value */
|
||||
cosVal = (cosVal + (b * wb)) + ((c * wc) + (d * wd));
|
||||
|
||||
/* Return the output value */
|
||||
return (cosVal);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cos group
|
||||
*/
|
|
@ -1,205 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cos_q15.c
|
||||
*
|
||||
* Description: Fast cosine calculation for Q15 values.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupFastMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cos
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* \par
|
||||
* Table Values are in Q15(1.15 Fixed point format) and generation is done in three steps
|
||||
* \par
|
||||
* First Generate cos values in floating point:
|
||||
* tableSize = 256;
|
||||
* <pre>for(n = -1; n < (tableSize + 1); n++)
|
||||
* {
|
||||
* cosTable[n+1]= cos(2*pi*n/tableSize);
|
||||
* }</pre>
|
||||
* where pi value is 3.14159265358979
|
||||
* \par
|
||||
* Secondly Convert Floating point to Q15(Fixed point):
|
||||
* (cosTable[i] * pow(2, 15))
|
||||
* \par
|
||||
* Finally Rounding to nearest integer is done
|
||||
* cosTable[i] += (cosTable[i] > 0 ? 0.5 :-0.5);
|
||||
*/
|
||||
|
||||
static const q15_t cosTableQ15[259] = {
|
||||
0x7ff6, 0x7fff, 0x7ff6, 0x7fd9, 0x7fa7, 0x7f62, 0x7f0a, 0x7e9d,
|
||||
0x7e1e, 0x7d8a, 0x7ce4, 0x7c2a, 0x7b5d, 0x7a7d, 0x798a, 0x7885,
|
||||
0x776c, 0x7642, 0x7505, 0x73b6, 0x7255, 0x70e3, 0x6f5f, 0x6dca,
|
||||
0x6c24, 0x6a6e, 0x68a7, 0x66d0, 0x64e9, 0x62f2, 0x60ec, 0x5ed7,
|
||||
0x5cb4, 0x5a82, 0x5843, 0x55f6, 0x539b, 0x5134, 0x4ec0, 0x4c40,
|
||||
0x49b4, 0x471d, 0x447b, 0x41ce, 0x3f17, 0x3c57, 0x398d, 0x36ba,
|
||||
0x33df, 0x30fc, 0x2e11, 0x2b1f, 0x2827, 0x2528, 0x2224, 0x1f1a,
|
||||
0x1c0c, 0x18f9, 0x15e2, 0x12c8, 0xfab, 0xc8c, 0x96b, 0x648,
|
||||
0x324, 0x0, 0xfcdc, 0xf9b8, 0xf695, 0xf374, 0xf055, 0xed38,
|
||||
0xea1e, 0xe707, 0xe3f4, 0xe0e6, 0xdddc, 0xdad8, 0xd7d9, 0xd4e1,
|
||||
0xd1ef, 0xcf04, 0xcc21, 0xc946, 0xc673, 0xc3a9, 0xc0e9, 0xbe32,
|
||||
0xbb85, 0xb8e3, 0xb64c, 0xb3c0, 0xb140, 0xaecc, 0xac65, 0xaa0a,
|
||||
0xa7bd, 0xa57e, 0xa34c, 0xa129, 0x9f14, 0x9d0e, 0x9b17, 0x9930,
|
||||
0x9759, 0x9592, 0x93dc, 0x9236, 0x90a1, 0x8f1d, 0x8dab, 0x8c4a,
|
||||
0x8afb, 0x89be, 0x8894, 0x877b, 0x8676, 0x8583, 0x84a3, 0x83d6,
|
||||
0x831c, 0x8276, 0x81e2, 0x8163, 0x80f6, 0x809e, 0x8059, 0x8027,
|
||||
0x800a, 0x8000, 0x800a, 0x8027, 0x8059, 0x809e, 0x80f6, 0x8163,
|
||||
0x81e2, 0x8276, 0x831c, 0x83d6, 0x84a3, 0x8583, 0x8676, 0x877b,
|
||||
0x8894, 0x89be, 0x8afb, 0x8c4a, 0x8dab, 0x8f1d, 0x90a1, 0x9236,
|
||||
0x93dc, 0x9592, 0x9759, 0x9930, 0x9b17, 0x9d0e, 0x9f14, 0xa129,
|
||||
0xa34c, 0xa57e, 0xa7bd, 0xaa0a, 0xac65, 0xaecc, 0xb140, 0xb3c0,
|
||||
0xb64c, 0xb8e3, 0xbb85, 0xbe32, 0xc0e9, 0xc3a9, 0xc673, 0xc946,
|
||||
0xcc21, 0xcf04, 0xd1ef, 0xd4e1, 0xd7d9, 0xdad8, 0xdddc, 0xe0e6,
|
||||
0xe3f4, 0xe707, 0xea1e, 0xed38, 0xf055, 0xf374, 0xf695, 0xf9b8,
|
||||
0xfcdc, 0x0, 0x324, 0x648, 0x96b, 0xc8c, 0xfab, 0x12c8,
|
||||
0x15e2, 0x18f9, 0x1c0c, 0x1f1a, 0x2224, 0x2528, 0x2827, 0x2b1f,
|
||||
0x2e11, 0x30fc, 0x33df, 0x36ba, 0x398d, 0x3c57, 0x3f17, 0x41ce,
|
||||
0x447b, 0x471d, 0x49b4, 0x4c40, 0x4ec0, 0x5134, 0x539b, 0x55f6,
|
||||
0x5843, 0x5a82, 0x5cb4, 0x5ed7, 0x60ec, 0x62f2, 0x64e9, 0x66d0,
|
||||
0x68a7, 0x6a6e, 0x6c24, 0x6dca, 0x6f5f, 0x70e3, 0x7255, 0x73b6,
|
||||
0x7505, 0x7642, 0x776c, 0x7885, 0x798a, 0x7a7d, 0x7b5d, 0x7c2a,
|
||||
0x7ce4, 0x7d8a, 0x7e1e, 0x7e9d, 0x7f0a, 0x7f62, 0x7fa7, 0x7fd9,
|
||||
0x7ff6, 0x7fff, 0x7ff6
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief Fast approximation to the trigonometric cosine function for Q15 data.
|
||||
* @param[in] x Scaled input value in radians.
|
||||
* @return cos(x).
|
||||
*
|
||||
* The Q15 input value is in the range [0 +0.9999] and is mapped to a radian value in the range [0 2*pi), Here range excludes 2*pi.
|
||||
*/
|
||||
|
||||
q15_t arm_cos_q15(
|
||||
q15_t x)
|
||||
{
|
||||
q31_t cosVal; /* Temporary variable for output */
|
||||
q15_t *tablePtr; /* Pointer to table */
|
||||
q15_t in, in2; /* Temporary variables for input */
|
||||
q31_t wa, wb, wc, wd; /* Cubic interpolation coefficients */
|
||||
q15_t a, b, c, d; /* Four nearest output values */
|
||||
q15_t fract, fractCube, fractSquare; /* Variables for fractional value */
|
||||
q15_t oneBy6 = 0x1555; /* Fixed point value of 1/6 */
|
||||
q15_t tableSpacing = TABLE_SPACING_Q15; /* Table spacing */
|
||||
int32_t index; /* Index variable */
|
||||
|
||||
in = x;
|
||||
|
||||
/* Calculate the nearest index */
|
||||
index = (int32_t) in / tableSpacing;
|
||||
|
||||
/* Calculate the nearest value of input */
|
||||
in2 = (q15_t) index *tableSpacing;
|
||||
|
||||
/* Calculation of fractional value */
|
||||
fract = (in - in2) << 8;
|
||||
|
||||
/* fractSquare = fract * fract */
|
||||
fractSquare = (q15_t) ((fract * fract) >> 15);
|
||||
|
||||
/* fractCube = fract * fract * fract */
|
||||
fractCube = (q15_t) ((fractSquare * fract) >> 15);
|
||||
|
||||
/* Checking min and max index of table */
|
||||
if(index < 0)
|
||||
{
|
||||
index = 0;
|
||||
}
|
||||
else if(index > 256)
|
||||
{
|
||||
index = 256;
|
||||
}
|
||||
|
||||
/* Initialise table pointer */
|
||||
tablePtr = (q15_t *) & cosTableQ15[index];
|
||||
|
||||
/* Cubic interpolation process */
|
||||
/* Calculation of wa */
|
||||
/* wa = -(oneBy6)*fractCube + (fractSquare >> 1u) - (0x2AAA)*fract; */
|
||||
wa = (q31_t) oneBy6 *fractCube;
|
||||
wa += (q31_t) 0x2AAA *fract;
|
||||
wa = -(wa >> 15);
|
||||
wa += (fractSquare >> 1u);
|
||||
|
||||
/* Read first nearest value of output from the cos table */
|
||||
a = *tablePtr++;
|
||||
|
||||
/* cosVal = a * wa */
|
||||
cosVal = a * wa;
|
||||
|
||||
/* Calculation of wb */
|
||||
wb = (((fractCube >> 1u) - fractSquare) - (fract >> 1u)) + 0x7FFF;
|
||||
|
||||
/* Read second nearest value of output from the cos table */
|
||||
b = *tablePtr++;
|
||||
|
||||
/* cosVal += b*wb */
|
||||
cosVal += b * wb;
|
||||
|
||||
/* Calculation of wc */
|
||||
wc = -(q31_t) fractCube + fractSquare;
|
||||
wc = (wc >> 1u) + fract;
|
||||
|
||||
/* Read third nearest value of output from the cos table */
|
||||
c = *tablePtr++;
|
||||
|
||||
/* cosVal += c*wc */
|
||||
cosVal += c * wc;
|
||||
|
||||
/* Calculation of wd */
|
||||
/* wd = (oneBy6)*fractCube - (oneBy6)*fract; */
|
||||
fractCube = fractCube - fract;
|
||||
wd = ((q15_t) (((q31_t) oneBy6 * fractCube) >> 15));
|
||||
|
||||
/* Read fourth nearest value of output from the cos table */
|
||||
d = *tablePtr++;
|
||||
|
||||
/* cosVal += d*wd; */
|
||||
cosVal += d * wd;
|
||||
|
||||
/* Convert output value in 1.15(q15) format and saturate */
|
||||
cosVal = __SSAT((cosVal >> 15), 16);
|
||||
|
||||
/* Return the output value in 1.15(q15) format */
|
||||
return ((q15_t) cosVal);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cos group
|
||||
*/
|
|
@ -1,239 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cos_q31.c
|
||||
*
|
||||
* Description: Fast cosine calculation for Q31 values.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupFastMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup cos
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* \par
|
||||
* Table Values are in Q31(1.31 Fixed point format) and generation is done in three steps
|
||||
* First Generate cos values in floating point:
|
||||
* tableSize = 256;
|
||||
* <pre>for(n = -1; n < (tableSize + 1); n++)
|
||||
* {
|
||||
* cosTable[n+1]= cos(2*pi*n/tableSize);
|
||||
* } </pre>
|
||||
* where pi value is 3.14159265358979
|
||||
* \par
|
||||
* Secondly Convert Floating point to Q31(Fixed point):
|
||||
* (cosTable[i] * pow(2, 31))
|
||||
* \par
|
||||
* Finally Rounding to nearest integer is done
|
||||
* cosTable[i] += (cosTable[i] > 0 ? 0.5 :-0.5);
|
||||
*/
|
||||
|
||||
|
||||
static const q31_t cosTableQ31[259] = {
|
||||
0x7ff62182, 0x7fffffff, 0x7ff62182, 0x7fd8878e, 0x7fa736b4, 0x7f62368f,
|
||||
0x7f0991c4, 0x7e9d55fc,
|
||||
0x7e1d93ea, 0x7d8a5f40, 0x7ce3ceb2, 0x7c29fbee, 0x7b5d039e, 0x7a7d055b,
|
||||
0x798a23b1, 0x78848414,
|
||||
0x776c4edb, 0x7641af3d, 0x7504d345, 0x73b5ebd1, 0x72552c85, 0x70e2cbc6,
|
||||
0x6f5f02b2, 0x6dca0d14,
|
||||
0x6c242960, 0x6a6d98a4, 0x68a69e81, 0x66cf8120, 0x64e88926, 0x62f201ac,
|
||||
0x60ec3830, 0x5ed77c8a,
|
||||
0x5cb420e0, 0x5a82799a, 0x5842dd54, 0x55f5a4d2, 0x539b2af0, 0x5133cc94,
|
||||
0x4ebfe8a5, 0x4c3fdff4,
|
||||
0x49b41533, 0x471cece7, 0x447acd50, 0x41ce1e65, 0x3f1749b8, 0x3c56ba70,
|
||||
0x398cdd32, 0x36ba2014,
|
||||
0x33def287, 0x30fbc54d, 0x2e110a62, 0x2b1f34eb, 0x2826b928, 0x25280c5e,
|
||||
0x2223a4c5, 0x1f19f97b,
|
||||
0x1c0b826a, 0x18f8b83c, 0x15e21445, 0x12c8106f, 0xfab272b, 0xc8bd35e,
|
||||
0x96a9049, 0x647d97c,
|
||||
0x3242abf, 0x0, 0xfcdbd541, 0xf9b82684, 0xf6956fb7, 0xf3742ca2, 0xf054d8d5,
|
||||
0xed37ef91,
|
||||
0xea1debbb, 0xe70747c4, 0xe3f47d96, 0xe0e60685, 0xdddc5b3b, 0xdad7f3a2,
|
||||
0xd7d946d8, 0xd4e0cb15,
|
||||
0xd1eef59e, 0xcf043ab3, 0xcc210d79, 0xc945dfec, 0xc67322ce, 0xc3a94590,
|
||||
0xc0e8b648, 0xbe31e19b,
|
||||
0xbb8532b0, 0xb8e31319, 0xb64beacd, 0xb3c0200c, 0xb140175b, 0xaecc336c,
|
||||
0xac64d510, 0xaa0a5b2e,
|
||||
0xa7bd22ac, 0xa57d8666, 0xa34bdf20, 0xa1288376, 0x9f13c7d0, 0x9d0dfe54,
|
||||
0x9b1776da, 0x99307ee0,
|
||||
0x9759617f, 0x9592675c, 0x93dbd6a0, 0x9235f2ec, 0x90a0fd4e, 0x8f1d343a,
|
||||
0x8daad37b, 0x8c4a142f,
|
||||
0x8afb2cbb, 0x89be50c3, 0x8893b125, 0x877b7bec, 0x8675dc4f, 0x8582faa5,
|
||||
0x84a2fc62, 0x83d60412,
|
||||
0x831c314e, 0x8275a0c0, 0x81e26c16, 0x8162aa04, 0x80f66e3c, 0x809dc971,
|
||||
0x8058c94c, 0x80277872,
|
||||
0x8009de7e, 0x80000000, 0x8009de7e, 0x80277872, 0x8058c94c, 0x809dc971,
|
||||
0x80f66e3c, 0x8162aa04,
|
||||
0x81e26c16, 0x8275a0c0, 0x831c314e, 0x83d60412, 0x84a2fc62, 0x8582faa5,
|
||||
0x8675dc4f, 0x877b7bec,
|
||||
0x8893b125, 0x89be50c3, 0x8afb2cbb, 0x8c4a142f, 0x8daad37b, 0x8f1d343a,
|
||||
0x90a0fd4e, 0x9235f2ec,
|
||||
0x93dbd6a0, 0x9592675c, 0x9759617f, 0x99307ee0, 0x9b1776da, 0x9d0dfe54,
|
||||
0x9f13c7d0, 0xa1288376,
|
||||
0xa34bdf20, 0xa57d8666, 0xa7bd22ac, 0xaa0a5b2e, 0xac64d510, 0xaecc336c,
|
||||
0xb140175b, 0xb3c0200c,
|
||||
0xb64beacd, 0xb8e31319, 0xbb8532b0, 0xbe31e19b, 0xc0e8b648, 0xc3a94590,
|
||||
0xc67322ce, 0xc945dfec,
|
||||
0xcc210d79, 0xcf043ab3, 0xd1eef59e, 0xd4e0cb15, 0xd7d946d8, 0xdad7f3a2,
|
||||
0xdddc5b3b, 0xe0e60685,
|
||||
0xe3f47d96, 0xe70747c4, 0xea1debbb, 0xed37ef91, 0xf054d8d5, 0xf3742ca2,
|
||||
0xf6956fb7, 0xf9b82684,
|
||||
0xfcdbd541, 0x0, 0x3242abf, 0x647d97c, 0x96a9049, 0xc8bd35e, 0xfab272b,
|
||||
0x12c8106f,
|
||||
0x15e21445, 0x18f8b83c, 0x1c0b826a, 0x1f19f97b, 0x2223a4c5, 0x25280c5e,
|
||||
0x2826b928, 0x2b1f34eb,
|
||||
0x2e110a62, 0x30fbc54d, 0x33def287, 0x36ba2014, 0x398cdd32, 0x3c56ba70,
|
||||
0x3f1749b8, 0x41ce1e65,
|
||||
0x447acd50, 0x471cece7, 0x49b41533, 0x4c3fdff4, 0x4ebfe8a5, 0x5133cc94,
|
||||
0x539b2af0, 0x55f5a4d2,
|
||||
0x5842dd54, 0x5a82799a, 0x5cb420e0, 0x5ed77c8a, 0x60ec3830, 0x62f201ac,
|
||||
0x64e88926, 0x66cf8120,
|
||||
0x68a69e81, 0x6a6d98a4, 0x6c242960, 0x6dca0d14, 0x6f5f02b2, 0x70e2cbc6,
|
||||
0x72552c85, 0x73b5ebd1,
|
||||
0x7504d345, 0x7641af3d, 0x776c4edb, 0x78848414, 0x798a23b1, 0x7a7d055b,
|
||||
0x7b5d039e, 0x7c29fbee,
|
||||
0x7ce3ceb2, 0x7d8a5f40, 0x7e1d93ea, 0x7e9d55fc, 0x7f0991c4, 0x7f62368f,
|
||||
0x7fa736b4, 0x7fd8878e,
|
||||
0x7ff62182, 0x7fffffff, 0x7ff62182
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Fast approximation to the trigonometric cosine function for Q31 data.
|
||||
* @param[in] x Scaled input value in radians.
|
||||
* @return cos(x).
|
||||
*
|
||||
* The Q31 input value is in the range [0 +0.9999] and is mapped to a radian value in the range [0 2*pi), Here range excludes 2*pi.
|
||||
*/
|
||||
|
||||
q31_t arm_cos_q31(
|
||||
q31_t x)
|
||||
{
|
||||
q31_t cosVal, in, in2; /* Temporary variables for input, output */
|
||||
q31_t wa, wb, wc, wd; /* Cubic interpolation coefficients */
|
||||
q31_t a, b, c, d; /* Four nearest output values */
|
||||
q31_t *tablePtr; /* Pointer to table */
|
||||
q31_t fract, fractCube, fractSquare; /* Temporary values for fractional values */
|
||||
q31_t oneBy6 = 0x15555555; /* Fixed point value of 1/6 */
|
||||
q31_t tableSpacing = TABLE_SPACING_Q31; /* Table spacing */
|
||||
q31_t temp; /* Temporary variable for intermediate process */
|
||||
int32_t index; /* Index variable */
|
||||
|
||||
in = x;
|
||||
|
||||
/* Calculate the nearest index */
|
||||
index = in / tableSpacing;
|
||||
|
||||
/* Calculate the nearest value of input */
|
||||
in2 = ((q31_t) index) * tableSpacing;
|
||||
|
||||
/* Calculation of fractional value */
|
||||
fract = (in - in2) << 8;
|
||||
|
||||
/* fractSquare = fract * fract */
|
||||
fractSquare = ((q31_t) (((q63_t) fract * fract) >> 32));
|
||||
fractSquare = fractSquare << 1;
|
||||
|
||||
/* fractCube = fract * fract * fract */
|
||||
fractCube = ((q31_t) (((q63_t) fractSquare * fract) >> 32));
|
||||
fractCube = fractCube << 1;
|
||||
|
||||
/* Checking min and max index of table */
|
||||
if(index < 0)
|
||||
{
|
||||
index = 0;
|
||||
}
|
||||
else if(index > 256)
|
||||
{
|
||||
index = 256;
|
||||
}
|
||||
|
||||
/* Initialise table pointer */
|
||||
tablePtr = (q31_t *) & cosTableQ31[index];
|
||||
|
||||
/* Cubic interpolation process */
|
||||
/* Calculation of wa */
|
||||
/* wa = -(oneBy6)*fractCube + (fractSquare >> 1u) - (0x2AAAAAAA)*fract; */
|
||||
wa = ((q31_t) (((q63_t) oneBy6 * fractCube) >> 32));
|
||||
temp = 0x2AAAAAAA;
|
||||
wa = (q31_t) ((((q63_t) wa << 32) + ((q63_t) temp * fract)) >> 32);
|
||||
wa = -(wa << 1u);
|
||||
wa += (fractSquare >> 1u);
|
||||
|
||||
/* Read first nearest value of output from the cos table */
|
||||
a = *tablePtr++;
|
||||
|
||||
/* cosVal = a*wa */
|
||||
cosVal = ((q31_t) (((q63_t) a * wa) >> 32));
|
||||
|
||||
/* q31(1.31) Fixed point value of 1 */
|
||||
temp = 0x7FFFFFFF;
|
||||
|
||||
/* Calculation of wb */
|
||||
wb = ((fractCube >> 1u) - (fractSquare + (fract >> 1u))) + temp;
|
||||
/* Read second nearest value of output from the cos table */
|
||||
b = *tablePtr++;
|
||||
|
||||
/* cosVal += b*wb */
|
||||
cosVal = (q31_t) ((((q63_t) cosVal << 32) + ((q63_t) b * (wb))) >> 32);
|
||||
|
||||
/* Calculation of wc */
|
||||
wc = -fractCube + fractSquare;
|
||||
wc = (wc >> 1u) + fract;
|
||||
/* Read third nearest values of output value from the cos table */
|
||||
c = *tablePtr++;
|
||||
|
||||
/* cosVal += c*wc */
|
||||
cosVal = (q31_t) ((((q63_t) cosVal << 32) + ((q63_t) c * (wc))) >> 32);
|
||||
|
||||
/* Calculation of wd */
|
||||
/* wd = (oneBy6)*fractCube - (oneBy6)*fract; */
|
||||
fractCube = fractCube - fract;
|
||||
wd = ((q31_t) (((q63_t) oneBy6 * fractCube) >> 32));
|
||||
wd = (wd << 1u);
|
||||
|
||||
/* Read fourth nearest value of output from the cos table */
|
||||
d = *tablePtr++;
|
||||
|
||||
/* cosVal += d*wd; */
|
||||
cosVal = (q31_t) ((((q63_t) cosVal << 32) + ((q63_t) d * (wd))) >> 32);
|
||||
|
||||
|
||||
/* convert cosVal in 2.30 format to 1.31 format */
|
||||
return (__QADD(cosVal, cosVal));
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of cos group
|
||||
*/
|
|
@ -1,281 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sin_f32.c
|
||||
*
|
||||
* Description: Fast sine calculation for floating-point values.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupFastMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup sin Sine
|
||||
*
|
||||
* Computes the trigonometric sine function using a combination of table lookup
|
||||
* and cubic interpolation. There are separate functions for
|
||||
* Q15, Q31, and floating-point data types.
|
||||
* The input to the floating-point version is in radians while the
|
||||
* fixed-point Q15 and Q31 have a scaled input with the range
|
||||
* [0 +0.9999] mapping to [0 2*pi), Where range excludes 2*pi.
|
||||
*
|
||||
* The implementation is based on table lookup using 256 values together with cubic interpolation.
|
||||
* The steps used are:
|
||||
* -# Calculation of the nearest integer table index
|
||||
* -# Fetch the four table values a, b, c, and d
|
||||
* -# Compute the fractional portion (fract) of the table index.
|
||||
* -# Calculation of wa, wb, wc, wd
|
||||
* -# The final result equals <code>a*wa + b*wb + c*wc + d*wd</code>
|
||||
*
|
||||
* where
|
||||
* <pre>
|
||||
* a=Table[index-1];
|
||||
* b=Table[index+0];
|
||||
* c=Table[index+1];
|
||||
* d=Table[index+2];
|
||||
* </pre>
|
||||
* and
|
||||
* <pre>
|
||||
* wa=-(1/6)*fract.^3 + (1/2)*fract.^2 - (1/3)*fract;
|
||||
* wb=(1/2)*fract.^3 - fract.^2 - (1/2)*fract + 1;
|
||||
* wc=-(1/2)*fract.^3+(1/2)*fract.^2+fract;
|
||||
* wd=(1/6)*fract.^3 - (1/6)*fract;
|
||||
* </pre>
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup sin
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* \par
|
||||
* Example code for Generation of Floating-point Sin Table:
|
||||
* tableSize = 256;
|
||||
* <pre>for(n = -1; n < (tableSize + 1); n++)
|
||||
* {
|
||||
* sinTable[n+1]=sin(2*pi*n/tableSize);
|
||||
* }</pre>
|
||||
* \par
|
||||
* where pi value is 3.14159265358979
|
||||
*/
|
||||
|
||||
static const float32_t sinTable[259] = {
|
||||
-0.024541229009628296f, 0.000000000000000000f, 0.024541229009628296f,
|
||||
0.049067676067352295f, 0.073564566671848297f, 0.098017141222953796f,
|
||||
0.122410677373409270f, 0.146730467677116390f,
|
||||
0.170961886644363400f, 0.195090323686599730f, 0.219101235270500180f,
|
||||
0.242980182170867920f, 0.266712754964828490f, 0.290284663438797000f,
|
||||
0.313681751489639280f, 0.336889863014221190f,
|
||||
0.359895050525665280f, 0.382683426141738890f, 0.405241310596466060f,
|
||||
0.427555084228515630f, 0.449611335992813110f, 0.471396744251251220f,
|
||||
0.492898195981979370f, 0.514102756977081300f,
|
||||
0.534997642040252690f, 0.555570244789123540f, 0.575808167457580570f,
|
||||
0.595699310302734380f, 0.615231573581695560f, 0.634393274784088130f,
|
||||
0.653172850608825680f, 0.671558976173400880f,
|
||||
0.689540565013885500f, 0.707106769084930420f, 0.724247097969055180f,
|
||||
0.740951120853424070f, 0.757208824157714840f, 0.773010432720184330f,
|
||||
0.788346409797668460f, 0.803207516670227050f,
|
||||
0.817584812641143800f, 0.831469595432281490f, 0.844853579998016360f,
|
||||
0.857728600502014160f, 0.870086967945098880f, 0.881921291351318360f,
|
||||
0.893224298954010010f, 0.903989315032958980f,
|
||||
0.914209783077239990f, 0.923879504203796390f, 0.932992815971374510f,
|
||||
0.941544055938720700f, 0.949528157711029050f, 0.956940352916717530f,
|
||||
0.963776051998138430f, 0.970031261444091800f,
|
||||
0.975702106952667240f, 0.980785250663757320f, 0.985277652740478520f,
|
||||
0.989176511764526370f, 0.992479562759399410f, 0.995184719562530520f,
|
||||
0.997290432453155520f, 0.998795449733734130f,
|
||||
0.999698817729949950f, 1.000000000000000000f, 0.999698817729949950f,
|
||||
0.998795449733734130f, 0.997290432453155520f, 0.995184719562530520f,
|
||||
0.992479562759399410f, 0.989176511764526370f,
|
||||
0.985277652740478520f, 0.980785250663757320f, 0.975702106952667240f,
|
||||
0.970031261444091800f, 0.963776051998138430f, 0.956940352916717530f,
|
||||
0.949528157711029050f, 0.941544055938720700f,
|
||||
0.932992815971374510f, 0.923879504203796390f, 0.914209783077239990f,
|
||||
0.903989315032958980f, 0.893224298954010010f, 0.881921291351318360f,
|
||||
0.870086967945098880f, 0.857728600502014160f,
|
||||
0.844853579998016360f, 0.831469595432281490f, 0.817584812641143800f,
|
||||
0.803207516670227050f, 0.788346409797668460f, 0.773010432720184330f,
|
||||
0.757208824157714840f, 0.740951120853424070f,
|
||||
0.724247097969055180f, 0.707106769084930420f, 0.689540565013885500f,
|
||||
0.671558976173400880f, 0.653172850608825680f, 0.634393274784088130f,
|
||||
0.615231573581695560f, 0.595699310302734380f,
|
||||
0.575808167457580570f, 0.555570244789123540f, 0.534997642040252690f,
|
||||
0.514102756977081300f, 0.492898195981979370f, 0.471396744251251220f,
|
||||
0.449611335992813110f, 0.427555084228515630f,
|
||||
0.405241310596466060f, 0.382683426141738890f, 0.359895050525665280f,
|
||||
0.336889863014221190f, 0.313681751489639280f, 0.290284663438797000f,
|
||||
0.266712754964828490f, 0.242980182170867920f,
|
||||
0.219101235270500180f, 0.195090323686599730f, 0.170961886644363400f,
|
||||
0.146730467677116390f, 0.122410677373409270f, 0.098017141222953796f,
|
||||
0.073564566671848297f, 0.049067676067352295f,
|
||||
0.024541229009628296f, 0.000000000000000122f, -0.024541229009628296f,
|
||||
-0.049067676067352295f, -0.073564566671848297f, -0.098017141222953796f,
|
||||
-0.122410677373409270f, -0.146730467677116390f,
|
||||
-0.170961886644363400f, -0.195090323686599730f, -0.219101235270500180f,
|
||||
-0.242980182170867920f, -0.266712754964828490f, -0.290284663438797000f,
|
||||
-0.313681751489639280f, -0.336889863014221190f,
|
||||
-0.359895050525665280f, -0.382683426141738890f, -0.405241310596466060f,
|
||||
-0.427555084228515630f, -0.449611335992813110f, -0.471396744251251220f,
|
||||
-0.492898195981979370f, -0.514102756977081300f,
|
||||
-0.534997642040252690f, -0.555570244789123540f, -0.575808167457580570f,
|
||||
-0.595699310302734380f, -0.615231573581695560f, -0.634393274784088130f,
|
||||
-0.653172850608825680f, -0.671558976173400880f,
|
||||
-0.689540565013885500f, -0.707106769084930420f, -0.724247097969055180f,
|
||||
-0.740951120853424070f, -0.757208824157714840f, -0.773010432720184330f,
|
||||
-0.788346409797668460f, -0.803207516670227050f,
|
||||
-0.817584812641143800f, -0.831469595432281490f, -0.844853579998016360f,
|
||||
-0.857728600502014160f, -0.870086967945098880f, -0.881921291351318360f,
|
||||
-0.893224298954010010f, -0.903989315032958980f,
|
||||
-0.914209783077239990f, -0.923879504203796390f, -0.932992815971374510f,
|
||||
-0.941544055938720700f, -0.949528157711029050f, -0.956940352916717530f,
|
||||
-0.963776051998138430f, -0.970031261444091800f,
|
||||
-0.975702106952667240f, -0.980785250663757320f, -0.985277652740478520f,
|
||||
-0.989176511764526370f, -0.992479562759399410f, -0.995184719562530520f,
|
||||
-0.997290432453155520f, -0.998795449733734130f,
|
||||
-0.999698817729949950f, -1.000000000000000000f, -0.999698817729949950f,
|
||||
-0.998795449733734130f, -0.997290432453155520f, -0.995184719562530520f,
|
||||
-0.992479562759399410f, -0.989176511764526370f,
|
||||
-0.985277652740478520f, -0.980785250663757320f, -0.975702106952667240f,
|
||||
-0.970031261444091800f, -0.963776051998138430f, -0.956940352916717530f,
|
||||
-0.949528157711029050f, -0.941544055938720700f,
|
||||
-0.932992815971374510f, -0.923879504203796390f, -0.914209783077239990f,
|
||||
-0.903989315032958980f, -0.893224298954010010f, -0.881921291351318360f,
|
||||
-0.870086967945098880f, -0.857728600502014160f,
|
||||
-0.844853579998016360f, -0.831469595432281490f, -0.817584812641143800f,
|
||||
-0.803207516670227050f, -0.788346409797668460f, -0.773010432720184330f,
|
||||
-0.757208824157714840f, -0.740951120853424070f,
|
||||
-0.724247097969055180f, -0.707106769084930420f, -0.689540565013885500f,
|
||||
-0.671558976173400880f, -0.653172850608825680f, -0.634393274784088130f,
|
||||
-0.615231573581695560f, -0.595699310302734380f,
|
||||
-0.575808167457580570f, -0.555570244789123540f, -0.534997642040252690f,
|
||||
-0.514102756977081300f, -0.492898195981979370f, -0.471396744251251220f,
|
||||
-0.449611335992813110f, -0.427555084228515630f,
|
||||
-0.405241310596466060f, -0.382683426141738890f, -0.359895050525665280f,
|
||||
-0.336889863014221190f, -0.313681751489639280f, -0.290284663438797000f,
|
||||
-0.266712754964828490f, -0.242980182170867920f,
|
||||
-0.219101235270500180f, -0.195090323686599730f, -0.170961886644363400f,
|
||||
-0.146730467677116390f, -0.122410677373409270f, -0.098017141222953796f,
|
||||
-0.073564566671848297f, -0.049067676067352295f,
|
||||
-0.024541229009628296f, -0.000000000000000245f, 0.024541229009628296f
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief Fast approximation to the trigonometric sine function for floating-point data.
|
||||
* @param[in] x input value in radians.
|
||||
* @return sin(x).
|
||||
*/
|
||||
|
||||
float32_t arm_sin_f32(
|
||||
float32_t x)
|
||||
{
|
||||
float32_t sinVal, fract, in; /* Temporary variables for input, output */
|
||||
int32_t index; /* Index variable */
|
||||
uint32_t tableSize = (uint32_t) TABLE_SIZE; /* Initialise tablesize */
|
||||
float32_t wa, wb, wc, wd; /* Cubic interpolation coefficients */
|
||||
float32_t a, b, c, d; /* Four nearest output values */
|
||||
float32_t *tablePtr; /* Pointer to table */
|
||||
int32_t n;
|
||||
float32_t fractsq, fractby2, fractby6, fractby3, fractsqby2;
|
||||
float32_t oneminusfractby2;
|
||||
float32_t frby2xfrsq, frby6xfrsq;
|
||||
|
||||
/* input x is in radians */
|
||||
/* Scale the input to [0 1] range from [0 2*PI] , divide input by 2*pi */
|
||||
in = x * 0.159154943092f;
|
||||
|
||||
/* Calculation of floor value of input */
|
||||
n = (int32_t) in;
|
||||
|
||||
/* Make negative values towards -infinity */
|
||||
if(x < 0.0f)
|
||||
{
|
||||
n = n - 1;
|
||||
}
|
||||
|
||||
/* Map input value to [0 1] */
|
||||
in = in - (float32_t) n;
|
||||
|
||||
/* Calculation of index of the table */
|
||||
index = (uint32_t) (tableSize * in);
|
||||
|
||||
/* fractional value calculation */
|
||||
fract = ((float32_t) tableSize * in) - (float32_t) index;
|
||||
|
||||
/* Checking min and max index of table */
|
||||
if(index < 0)
|
||||
{
|
||||
index = 0;
|
||||
}
|
||||
else if(index > 256)
|
||||
{
|
||||
index = 256;
|
||||
}
|
||||
|
||||
/* Initialise table pointer */
|
||||
tablePtr = (float32_t *) & sinTable[index];
|
||||
|
||||
/* Read four nearest values of input value from the sin table */
|
||||
a = tablePtr[0];
|
||||
b = tablePtr[1];
|
||||
c = tablePtr[2];
|
||||
d = tablePtr[3];
|
||||
|
||||
/* Cubic interpolation process */
|
||||
fractsq = fract * fract;
|
||||
fractby2 = fract * 0.5f;
|
||||
fractby6 = fract * 0.166666667f;
|
||||
fractby3 = fract * 0.3333333333333f;
|
||||
fractsqby2 = fractsq * 0.5f;
|
||||
frby2xfrsq = (fractby2) * fractsq;
|
||||
frby6xfrsq = (fractby6) * fractsq;
|
||||
oneminusfractby2 = 1.0f - fractby2;
|
||||
wb = fractsqby2 - fractby3;
|
||||
wc = (fractsqby2 + fract);
|
||||
wa = wb - frby6xfrsq;
|
||||
wb = frby2xfrsq - fractsq;
|
||||
sinVal = wa * a;
|
||||
wc = wc - frby2xfrsq;
|
||||
wd = (frby6xfrsq) - fractby6;
|
||||
wb = wb + oneminusfractby2;
|
||||
|
||||
/* Calculate sin value */
|
||||
sinVal = (sinVal + (b * wb)) + ((c * wc) + (d * wd));
|
||||
|
||||
/* Return the output value */
|
||||
return (sinVal);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of sin group
|
||||
*/
|
|
@ -1,208 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sin_q15.c
|
||||
*
|
||||
* Description: Fast sine calculation for Q15 values.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupFastMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup sin
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* \par
|
||||
* Example code for Generation of Q15 Sin Table:
|
||||
* \par
|
||||
* <pre>tableSize = 256;
|
||||
* for(n = -1; n < (tableSize + 1); n++)
|
||||
* {
|
||||
* sinTable[n+1]=sin(2*pi*n/tableSize);
|
||||
* } </pre>
|
||||
* where pi value is 3.14159265358979
|
||||
* \par
|
||||
* Convert Floating point to Q15(Fixed point):
|
||||
* (sinTable[i] * pow(2, 15))
|
||||
* \par
|
||||
* rounding to nearest integer is done
|
||||
* sinTable[i] += (sinTable[i] > 0 ? 0.5 :-0.5);
|
||||
*/
|
||||
|
||||
|
||||
static const q15_t sinTableQ15[259] = {
|
||||
0xfcdc, 0x0, 0x324, 0x648, 0x96b, 0xc8c, 0xfab, 0x12c8,
|
||||
0x15e2, 0x18f9, 0x1c0c, 0x1f1a, 0x2224, 0x2528, 0x2827, 0x2b1f,
|
||||
0x2e11, 0x30fc, 0x33df, 0x36ba, 0x398d, 0x3c57, 0x3f17, 0x41ce,
|
||||
0x447b, 0x471d, 0x49b4, 0x4c40, 0x4ec0, 0x5134, 0x539b, 0x55f6,
|
||||
0x5843, 0x5a82, 0x5cb4, 0x5ed7, 0x60ec, 0x62f2, 0x64e9, 0x66d0,
|
||||
0x68a7, 0x6a6e, 0x6c24, 0x6dca, 0x6f5f, 0x70e3, 0x7255, 0x73b6,
|
||||
0x7505, 0x7642, 0x776c, 0x7885, 0x798a, 0x7a7d, 0x7b5d, 0x7c2a,
|
||||
0x7ce4, 0x7d8a, 0x7e1e, 0x7e9d, 0x7f0a, 0x7f62, 0x7fa7, 0x7fd9,
|
||||
0x7ff6, 0x7fff, 0x7ff6, 0x7fd9, 0x7fa7, 0x7f62, 0x7f0a, 0x7e9d,
|
||||
0x7e1e, 0x7d8a, 0x7ce4, 0x7c2a, 0x7b5d, 0x7a7d, 0x798a, 0x7885,
|
||||
0x776c, 0x7642, 0x7505, 0x73b6, 0x7255, 0x70e3, 0x6f5f, 0x6dca,
|
||||
0x6c24, 0x6a6e, 0x68a7, 0x66d0, 0x64e9, 0x62f2, 0x60ec, 0x5ed7,
|
||||
0x5cb4, 0x5a82, 0x5843, 0x55f6, 0x539b, 0x5134, 0x4ec0, 0x4c40,
|
||||
0x49b4, 0x471d, 0x447b, 0x41ce, 0x3f17, 0x3c57, 0x398d, 0x36ba,
|
||||
0x33df, 0x30fc, 0x2e11, 0x2b1f, 0x2827, 0x2528, 0x2224, 0x1f1a,
|
||||
0x1c0c, 0x18f9, 0x15e2, 0x12c8, 0xfab, 0xc8c, 0x96b, 0x648,
|
||||
0x324, 0x0, 0xfcdc, 0xf9b8, 0xf695, 0xf374, 0xf055, 0xed38,
|
||||
0xea1e, 0xe707, 0xe3f4, 0xe0e6, 0xdddc, 0xdad8, 0xd7d9, 0xd4e1,
|
||||
0xd1ef, 0xcf04, 0xcc21, 0xc946, 0xc673, 0xc3a9, 0xc0e9, 0xbe32,
|
||||
0xbb85, 0xb8e3, 0xb64c, 0xb3c0, 0xb140, 0xaecc, 0xac65, 0xaa0a,
|
||||
0xa7bd, 0xa57e, 0xa34c, 0xa129, 0x9f14, 0x9d0e, 0x9b17, 0x9930,
|
||||
0x9759, 0x9592, 0x93dc, 0x9236, 0x90a1, 0x8f1d, 0x8dab, 0x8c4a,
|
||||
0x8afb, 0x89be, 0x8894, 0x877b, 0x8676, 0x8583, 0x84a3, 0x83d6,
|
||||
0x831c, 0x8276, 0x81e2, 0x8163, 0x80f6, 0x809e, 0x8059, 0x8027,
|
||||
0x800a, 0x8000, 0x800a, 0x8027, 0x8059, 0x809e, 0x80f6, 0x8163,
|
||||
0x81e2, 0x8276, 0x831c, 0x83d6, 0x84a3, 0x8583, 0x8676, 0x877b,
|
||||
0x8894, 0x89be, 0x8afb, 0x8c4a, 0x8dab, 0x8f1d, 0x90a1, 0x9236,
|
||||
0x93dc, 0x9592, 0x9759, 0x9930, 0x9b17, 0x9d0e, 0x9f14, 0xa129,
|
||||
0xa34c, 0xa57e, 0xa7bd, 0xaa0a, 0xac65, 0xaecc, 0xb140, 0xb3c0,
|
||||
0xb64c, 0xb8e3, 0xbb85, 0xbe32, 0xc0e9, 0xc3a9, 0xc673, 0xc946,
|
||||
0xcc21, 0xcf04, 0xd1ef, 0xd4e1, 0xd7d9, 0xdad8, 0xdddc, 0xe0e6,
|
||||
0xe3f4, 0xe707, 0xea1e, 0xed38, 0xf055, 0xf374, 0xf695, 0xf9b8,
|
||||
0xfcdc, 0x0, 0x324
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief Fast approximation to the trigonometric sine function for Q15 data.
|
||||
* @param[in] x Scaled input value in radians.
|
||||
* @return sin(x).
|
||||
*
|
||||
* The Q15 input value is in the range [0 +0.9999] and is mapped to a radian value in the range [0 2*pi), Here range excludes 2*pi.
|
||||
*/
|
||||
|
||||
q15_t arm_sin_q15(
|
||||
q15_t x)
|
||||
{
|
||||
q31_t sinVal; /* Temporary variables output */
|
||||
q15_t *tablePtr; /* Pointer to table */
|
||||
q15_t fract, in, in2; /* Temporary variables for input, output */
|
||||
q31_t wa, wb, wc, wd; /* Cubic interpolation coefficients */
|
||||
q15_t a, b, c, d; /* Four nearest output values */
|
||||
q15_t fractCube, fractSquare; /* Temporary values for fractional value */
|
||||
q15_t oneBy6 = 0x1555; /* Fixed point value of 1/6 */
|
||||
q15_t tableSpacing = TABLE_SPACING_Q15; /* Table spacing */
|
||||
int32_t index; /* Index variable */
|
||||
|
||||
in = x;
|
||||
|
||||
/* Calculate the nearest index */
|
||||
index = (int32_t) in / tableSpacing;
|
||||
|
||||
/* Calculate the nearest value of input */
|
||||
in2 = (q15_t) ((index) * tableSpacing);
|
||||
|
||||
/* Calculation of fractional value */
|
||||
fract = (in - in2) << 8;
|
||||
|
||||
/* fractSquare = fract * fract */
|
||||
fractSquare = (q15_t) ((fract * fract) >> 15);
|
||||
|
||||
/* fractCube = fract * fract * fract */
|
||||
fractCube = (q15_t) ((fractSquare * fract) >> 15);
|
||||
|
||||
/* Checking min and max index of table */
|
||||
if(index < 0)
|
||||
{
|
||||
index = 0;
|
||||
}
|
||||
else if(index > 256)
|
||||
{
|
||||
index = 256;
|
||||
}
|
||||
|
||||
/* Initialise table pointer */
|
||||
tablePtr = (q15_t *) & sinTableQ15[index];
|
||||
|
||||
/* Cubic interpolation process */
|
||||
/* Calculation of wa */
|
||||
/* wa = -(oneBy6)*fractCube + (fractSquare >> 1u) - (0x2AAA)*fract; */
|
||||
wa = (q31_t) oneBy6 *fractCube;
|
||||
wa += (q31_t) 0x2AAA *fract;
|
||||
wa = -(wa >> 15);
|
||||
wa += ((q31_t) fractSquare >> 1u);
|
||||
|
||||
/* Read first nearest value of output from the sin table */
|
||||
a = *tablePtr++;
|
||||
|
||||
/* sinVal = a * wa */
|
||||
sinVal = a * wa;
|
||||
|
||||
/* Calculation of wb */
|
||||
wb = (((q31_t) fractCube >> 1u) - (q31_t) fractSquare) -
|
||||
(((q31_t) fract >> 1u) - 0x7FFF);
|
||||
|
||||
/* Read second nearest value of output from the sin table */
|
||||
b = *tablePtr++;
|
||||
|
||||
/* sinVal += b*wb */
|
||||
sinVal += b * wb;
|
||||
|
||||
|
||||
/* Calculation of wc */
|
||||
wc = -(q31_t) fractCube + fractSquare;
|
||||
wc = (wc >> 1u) + fract;
|
||||
|
||||
/* Read third nearest value of output from the sin table */
|
||||
c = *tablePtr++;
|
||||
|
||||
/* sinVal += c*wc */
|
||||
sinVal += c * wc;
|
||||
|
||||
/* Calculation of wd */
|
||||
/* wd = (oneBy6)*fractCube - (oneBy6)*fract; */
|
||||
fractCube = fractCube - fract;
|
||||
wd = ((q15_t) (((q31_t) oneBy6 * fractCube) >> 15));
|
||||
|
||||
/* Read fourth nearest value of output from the sin table */
|
||||
d = *tablePtr++;
|
||||
|
||||
/* sinVal += d*wd; */
|
||||
sinVal += d * wd;
|
||||
|
||||
/* Convert output value in 1.15(q15) format and saturate */
|
||||
sinVal = __SSAT((sinVal >> 15), 16);
|
||||
|
||||
/* Return the output value in 1.15(q15) format */
|
||||
return ((q15_t) sinVal);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of sin group
|
||||
*/
|
|
@ -1,240 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sin_q31.c
|
||||
*
|
||||
* Description: Fast sine calculation for Q31 values.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupFastMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup sin
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* \par
|
||||
* Tables generated are in Q31(1.31 Fixed point format)
|
||||
* Generation of sin values in floating point:
|
||||
* <pre>tableSize = 256;
|
||||
* for(n = -1; n < (tableSize + 1); n++)
|
||||
* {
|
||||
* sinTable[n+1]= sin(2*pi*n/tableSize);
|
||||
* } </pre>
|
||||
* where pi value is 3.14159265358979
|
||||
* \par
|
||||
* Convert Floating point to Q31(Fixed point):
|
||||
* (sinTable[i] * pow(2, 31))
|
||||
* \par
|
||||
* rounding to nearest integer is done
|
||||
* sinTable[i] += (sinTable[i] > 0 ? 0.5 :-0.5);
|
||||
*/
|
||||
|
||||
static const q31_t sinTableQ31[259] = {
|
||||
0xfcdbd541, 0x0, 0x3242abf, 0x647d97c, 0x96a9049, 0xc8bd35e, 0xfab272b,
|
||||
0x12c8106f,
|
||||
0x15e21445, 0x18f8b83c, 0x1c0b826a, 0x1f19f97b, 0x2223a4c5, 0x25280c5e,
|
||||
0x2826b928, 0x2b1f34eb,
|
||||
0x2e110a62, 0x30fbc54d, 0x33def287, 0x36ba2014, 0x398cdd32, 0x3c56ba70,
|
||||
0x3f1749b8, 0x41ce1e65,
|
||||
0x447acd50, 0x471cece7, 0x49b41533, 0x4c3fdff4, 0x4ebfe8a5, 0x5133cc94,
|
||||
0x539b2af0, 0x55f5a4d2,
|
||||
0x5842dd54, 0x5a82799a, 0x5cb420e0, 0x5ed77c8a, 0x60ec3830, 0x62f201ac,
|
||||
0x64e88926, 0x66cf8120,
|
||||
0x68a69e81, 0x6a6d98a4, 0x6c242960, 0x6dca0d14, 0x6f5f02b2, 0x70e2cbc6,
|
||||
0x72552c85, 0x73b5ebd1,
|
||||
0x7504d345, 0x7641af3d, 0x776c4edb, 0x78848414, 0x798a23b1, 0x7a7d055b,
|
||||
0x7b5d039e, 0x7c29fbee,
|
||||
0x7ce3ceb2, 0x7d8a5f40, 0x7e1d93ea, 0x7e9d55fc, 0x7f0991c4, 0x7f62368f,
|
||||
0x7fa736b4, 0x7fd8878e,
|
||||
0x7ff62182, 0x7fffffff, 0x7ff62182, 0x7fd8878e, 0x7fa736b4, 0x7f62368f,
|
||||
0x7f0991c4, 0x7e9d55fc,
|
||||
0x7e1d93ea, 0x7d8a5f40, 0x7ce3ceb2, 0x7c29fbee, 0x7b5d039e, 0x7a7d055b,
|
||||
0x798a23b1, 0x78848414,
|
||||
0x776c4edb, 0x7641af3d, 0x7504d345, 0x73b5ebd1, 0x72552c85, 0x70e2cbc6,
|
||||
0x6f5f02b2, 0x6dca0d14,
|
||||
0x6c242960, 0x6a6d98a4, 0x68a69e81, 0x66cf8120, 0x64e88926, 0x62f201ac,
|
||||
0x60ec3830, 0x5ed77c8a,
|
||||
0x5cb420e0, 0x5a82799a, 0x5842dd54, 0x55f5a4d2, 0x539b2af0, 0x5133cc94,
|
||||
0x4ebfe8a5, 0x4c3fdff4,
|
||||
0x49b41533, 0x471cece7, 0x447acd50, 0x41ce1e65, 0x3f1749b8, 0x3c56ba70,
|
||||
0x398cdd32, 0x36ba2014,
|
||||
0x33def287, 0x30fbc54d, 0x2e110a62, 0x2b1f34eb, 0x2826b928, 0x25280c5e,
|
||||
0x2223a4c5, 0x1f19f97b,
|
||||
0x1c0b826a, 0x18f8b83c, 0x15e21445, 0x12c8106f, 0xfab272b, 0xc8bd35e,
|
||||
0x96a9049, 0x647d97c,
|
||||
0x3242abf, 0x0, 0xfcdbd541, 0xf9b82684, 0xf6956fb7, 0xf3742ca2, 0xf054d8d5,
|
||||
0xed37ef91,
|
||||
0xea1debbb, 0xe70747c4, 0xe3f47d96, 0xe0e60685, 0xdddc5b3b, 0xdad7f3a2,
|
||||
0xd7d946d8, 0xd4e0cb15,
|
||||
0xd1eef59e, 0xcf043ab3, 0xcc210d79, 0xc945dfec, 0xc67322ce, 0xc3a94590,
|
||||
0xc0e8b648, 0xbe31e19b,
|
||||
0xbb8532b0, 0xb8e31319, 0xb64beacd, 0xb3c0200c, 0xb140175b, 0xaecc336c,
|
||||
0xac64d510, 0xaa0a5b2e,
|
||||
0xa7bd22ac, 0xa57d8666, 0xa34bdf20, 0xa1288376, 0x9f13c7d0, 0x9d0dfe54,
|
||||
0x9b1776da, 0x99307ee0,
|
||||
0x9759617f, 0x9592675c, 0x93dbd6a0, 0x9235f2ec, 0x90a0fd4e, 0x8f1d343a,
|
||||
0x8daad37b, 0x8c4a142f,
|
||||
0x8afb2cbb, 0x89be50c3, 0x8893b125, 0x877b7bec, 0x8675dc4f, 0x8582faa5,
|
||||
0x84a2fc62, 0x83d60412,
|
||||
0x831c314e, 0x8275a0c0, 0x81e26c16, 0x8162aa04, 0x80f66e3c, 0x809dc971,
|
||||
0x8058c94c, 0x80277872,
|
||||
0x8009de7e, 0x80000000, 0x8009de7e, 0x80277872, 0x8058c94c, 0x809dc971,
|
||||
0x80f66e3c, 0x8162aa04,
|
||||
0x81e26c16, 0x8275a0c0, 0x831c314e, 0x83d60412, 0x84a2fc62, 0x8582faa5,
|
||||
0x8675dc4f, 0x877b7bec,
|
||||
0x8893b125, 0x89be50c3, 0x8afb2cbb, 0x8c4a142f, 0x8daad37b, 0x8f1d343a,
|
||||
0x90a0fd4e, 0x9235f2ec,
|
||||
0x93dbd6a0, 0x9592675c, 0x9759617f, 0x99307ee0, 0x9b1776da, 0x9d0dfe54,
|
||||
0x9f13c7d0, 0xa1288376,
|
||||
0xa34bdf20, 0xa57d8666, 0xa7bd22ac, 0xaa0a5b2e, 0xac64d510, 0xaecc336c,
|
||||
0xb140175b, 0xb3c0200c,
|
||||
0xb64beacd, 0xb8e31319, 0xbb8532b0, 0xbe31e19b, 0xc0e8b648, 0xc3a94590,
|
||||
0xc67322ce, 0xc945dfec,
|
||||
0xcc210d79, 0xcf043ab3, 0xd1eef59e, 0xd4e0cb15, 0xd7d946d8, 0xdad7f3a2,
|
||||
0xdddc5b3b, 0xe0e60685,
|
||||
0xe3f47d96, 0xe70747c4, 0xea1debbb, 0xed37ef91, 0xf054d8d5, 0xf3742ca2,
|
||||
0xf6956fb7, 0xf9b82684,
|
||||
0xfcdbd541, 0x0, 0x3242abf
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief Fast approximation to the trigonometric sine function for Q31 data.
|
||||
* @param[in] x Scaled input value in radians.
|
||||
* @return sin(x).
|
||||
*
|
||||
* The Q31 input value is in the range [0 +0.9999] and is mapped to a radian value in the range [0 2*pi), Here range excludes 2*pi.
|
||||
*/
|
||||
|
||||
q31_t arm_sin_q31(
|
||||
q31_t x)
|
||||
{
|
||||
q31_t sinVal, in, in2; /* Temporary variables for input, output */
|
||||
int32_t index; /* Index variables */
|
||||
q31_t wa, wb, wc, wd; /* Cubic interpolation coefficients */
|
||||
q31_t a, b, c, d; /* Four nearest output values */
|
||||
q31_t *tablePtr; /* Pointer to table */
|
||||
q31_t fract, fractCube, fractSquare; /* Temporary values for fractional values */
|
||||
q31_t oneBy6 = 0x15555555; /* Fixed point value of 1/6 */
|
||||
q31_t tableSpacing = TABLE_SPACING_Q31; /* Table spacing */
|
||||
q31_t temp; /* Temporary variable for intermediate process */
|
||||
|
||||
in = x;
|
||||
|
||||
/* Calculate the nearest index */
|
||||
index = (uint32_t) in / (uint32_t) tableSpacing;
|
||||
|
||||
/* Calculate the nearest value of input */
|
||||
in2 = (q31_t) index *tableSpacing;
|
||||
|
||||
/* Calculation of fractional value */
|
||||
fract = (in - in2) << 8;
|
||||
|
||||
/* fractSquare = fract * fract */
|
||||
fractSquare = ((q31_t) (((q63_t) fract * fract) >> 32));
|
||||
fractSquare = fractSquare << 1;
|
||||
|
||||
/* fractCube = fract * fract * fract */
|
||||
fractCube = ((q31_t) (((q63_t) fractSquare * fract) >> 32));
|
||||
fractCube = fractCube << 1;
|
||||
|
||||
/* Checking min and max index of table */
|
||||
if(index < 0)
|
||||
{
|
||||
index = 0;
|
||||
}
|
||||
else if(index > 256)
|
||||
{
|
||||
index = 256;
|
||||
}
|
||||
|
||||
/* Initialise table pointer */
|
||||
tablePtr = (q31_t *) & sinTableQ31[index];
|
||||
|
||||
/* Cubic interpolation process */
|
||||
/* Calculation of wa */
|
||||
/* wa = -(oneBy6)*fractCube + (fractSquare >> 1u) - (0x2AAAAAAA)*fract; */
|
||||
wa = ((q31_t) (((q63_t) oneBy6 * fractCube) >> 32));
|
||||
temp = 0x2AAAAAAA;
|
||||
wa = (q31_t) ((((q63_t) wa << 32) + ((q63_t) temp * fract)) >> 32);
|
||||
wa = -(wa << 1u);
|
||||
wa += (fractSquare >> 1u);
|
||||
|
||||
/* Read first nearest value of output from the sin table */
|
||||
a = *tablePtr++;
|
||||
|
||||
/* sinVal = a*wa */
|
||||
sinVal = ((q31_t) (((q63_t) a * wa) >> 32));
|
||||
|
||||
/* q31(1.31) Fixed point value of 1 */
|
||||
temp = 0x7FFFFFFF;
|
||||
|
||||
/* Calculation of wb */
|
||||
wb = ((fractCube >> 1u) - (fractSquare + (fract >> 1u))) + temp;
|
||||
|
||||
/* Read second nearest value of output from the sin table */
|
||||
b = *tablePtr++;
|
||||
|
||||
/* sinVal += b*wb */
|
||||
sinVal = (q31_t) ((((q63_t) sinVal << 32) + (q63_t) b * (wb)) >> 32);
|
||||
|
||||
/* Calculation of wc */
|
||||
wc = -fractCube + fractSquare;
|
||||
wc = (wc >> 1u) + fract;
|
||||
|
||||
/* Read third nearest value of output from the sin table */
|
||||
c = *tablePtr++;
|
||||
|
||||
/* sinVal += c*wc */
|
||||
sinVal = (q31_t) ((((q63_t) sinVal << 32) + ((q63_t) c * wc)) >> 32);
|
||||
|
||||
/* Calculation of wd */
|
||||
/* wd = (oneBy6) * fractCube - (oneBy6) * fract; */
|
||||
fractCube = fractCube - fract;
|
||||
wd = ((q31_t) (((q63_t) oneBy6 * fractCube) >> 32));
|
||||
wd = (wd << 1u);
|
||||
|
||||
/* Read fourth nearest value of output from the sin table */
|
||||
d = *tablePtr++;
|
||||
|
||||
/* sinVal += d*wd; */
|
||||
sinVal = (q31_t) ((((q63_t) sinVal << 32) + ((q63_t) d * wd)) >> 32);
|
||||
|
||||
/* convert sinVal in 2.30 format to 1.31 format */
|
||||
return (__QADD(sinVal, sinVal));
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of sin group
|
||||
*/
|
|
@ -1,131 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2011 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sqrt_q15.c
|
||||
*
|
||||
* Description: Q15 square root function.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.0 2011/03/08
|
||||
* Alpha release.
|
||||
*
|
||||
* Version 1.0.1 2011/09/30
|
||||
* Beta release.
|
||||
*
|
||||
* -------------------------------------------------------------------- */
|
||||
#include "arm_math.h"
|
||||
#include "arm_common_tables.h"
|
||||
|
||||
|
||||
/**
|
||||
* @ingroup groupFastMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup SQRT
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q15 square root function.
|
||||
* @param[in] in input value. The range of the input value is [0 +1) or 0x0000 to 0x7FFF.
|
||||
* @param[out] *pOut square root of input value.
|
||||
* @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
|
||||
* <code>in</code> is negative value and returns zero output for negative values.
|
||||
*/
|
||||
|
||||
arm_status arm_sqrt_q15(
|
||||
q15_t in,
|
||||
q15_t * pOut)
|
||||
{
|
||||
q15_t number, temp1, var1, signBits1, half;
|
||||
q31_t bits_val1;
|
||||
float32_t temp_float1;
|
||||
|
||||
number = in;
|
||||
|
||||
/* If the input is a positive number then compute the signBits. */
|
||||
if(number > 0)
|
||||
{
|
||||
signBits1 = __CLZ(number) - 17;
|
||||
|
||||
/* Shift by the number of signBits1 */
|
||||
if((signBits1 % 2) == 0)
|
||||
{
|
||||
number = number << signBits1;
|
||||
}
|
||||
else
|
||||
{
|
||||
number = number << (signBits1 - 1);
|
||||
}
|
||||
|
||||
/* Calculate half value of the number */
|
||||
half = number >> 1;
|
||||
/* Store the number for later use */
|
||||
temp1 = number;
|
||||
|
||||
/*Convert to float */
|
||||
temp_float1 = number * 3.051757812500000e-005f;
|
||||
/*Store as integer */
|
||||
bits_val1 = *(int *) &temp_float1;
|
||||
/* Subtract the shifted value from the magic number to give intial guess */
|
||||
bits_val1 = 0x5f3759df - (bits_val1 >> 1); // gives initial guess
|
||||
/* Store as float */
|
||||
temp_float1 = *(float *) &bits_val1;
|
||||
/* Convert to integer format */
|
||||
var1 = (q31_t) (temp_float1 * 16384);
|
||||
|
||||
/* 1st iteration */
|
||||
var1 = ((q15_t) ((q31_t) var1 * (0x3000 -
|
||||
((q15_t)
|
||||
((((q15_t)
|
||||
(((q31_t) var1 * var1) >> 15)) *
|
||||
(q31_t) half) >> 15))) >> 15)) << 2;
|
||||
/* 2nd iteration */
|
||||
var1 = ((q15_t) ((q31_t) var1 * (0x3000 -
|
||||
((q15_t)
|
||||
((((q15_t)
|
||||
(((q31_t) var1 * var1) >> 15)) *
|
||||
(q31_t) half) >> 15))) >> 15)) << 2;
|
||||
/* 3rd iteration */
|
||||
var1 = ((q15_t) ((q31_t) var1 * (0x3000 -
|
||||
((q15_t)
|
||||
((((q15_t)
|
||||
(((q31_t) var1 * var1) >> 15)) *
|
||||
(q31_t) half) >> 15))) >> 15)) << 2;
|
||||
|
||||
/* Multiply the inverse square root with the original value */
|
||||
var1 = ((q15_t) (((q31_t) temp1 * var1) >> 15)) << 1;
|
||||
|
||||
/* Shift the output down accordingly */
|
||||
if((signBits1 % 2) == 0)
|
||||
{
|
||||
var1 = var1 >> (signBits1 / 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
var1 = var1 >> ((signBits1 - 1) / 2);
|
||||
}
|
||||
*pOut = var1;
|
||||
|
||||
return (ARM_MATH_SUCCESS);
|
||||
}
|
||||
/* If the number is a negative number then store zero as its square root value */
|
||||
else
|
||||
{
|
||||
*pOut = 0;
|
||||
return (ARM_MATH_ARGUMENT_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of SQRT group
|
||||
*/
|
|
@ -1,129 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2011 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sqrt_q31.c
|
||||
*
|
||||
* Description: Q31 square root function.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.0 2011/03/08
|
||||
* Alpha release.
|
||||
*
|
||||
* Version 1.0.1 2011/09/30
|
||||
* Beta release.
|
||||
*
|
||||
* -------------------------------------------------------------------- */
|
||||
#include "arm_math.h"
|
||||
#include "arm_common_tables.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupFastMath
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup SQRT
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Q31 square root function.
|
||||
* @param[in] in input value. The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF.
|
||||
* @param[out] *pOut square root of input value.
|
||||
* @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
|
||||
* <code>in</code> is negative value and returns zero output for negative values.
|
||||
*/
|
||||
|
||||
arm_status arm_sqrt_q31(
|
||||
q31_t in,
|
||||
q31_t * pOut)
|
||||
{
|
||||
q31_t number, temp1, bits_val1, var1, signBits1, half;
|
||||
float32_t temp_float1;
|
||||
|
||||
number = in;
|
||||
|
||||
/* If the input is a positive number then compute the signBits. */
|
||||
if(number > 0)
|
||||
{
|
||||
signBits1 = __CLZ(number) - 1;
|
||||
|
||||
/* Shift by the number of signBits1 */
|
||||
if((signBits1 % 2) == 0)
|
||||
{
|
||||
number = number << signBits1;
|
||||
}
|
||||
else
|
||||
{
|
||||
number = number << (signBits1 - 1);
|
||||
}
|
||||
|
||||
/* Calculate half value of the number */
|
||||
half = number >> 1;
|
||||
/* Store the number for later use */
|
||||
temp1 = number;
|
||||
|
||||
/*Convert to float */
|
||||
temp_float1 = number * 4.6566128731e-010f;
|
||||
/*Store as integer */
|
||||
bits_val1 = *(int *) &temp_float1;
|
||||
/* Subtract the shifted value from the magic number to give intial guess */
|
||||
bits_val1 = 0x5f3759df - (bits_val1 >> 1); // gives initial guess
|
||||
/* Store as float */
|
||||
temp_float1 = *(float *) &bits_val1;
|
||||
/* Convert to integer format */
|
||||
var1 = (q31_t) (temp_float1 * 1073741824);
|
||||
|
||||
/* 1st iteration */
|
||||
var1 = ((q31_t) ((q63_t) var1 * (0x30000000 -
|
||||
((q31_t)
|
||||
((((q31_t)
|
||||
(((q63_t) var1 * var1) >> 31)) *
|
||||
(q63_t) half) >> 31))) >> 31)) << 2;
|
||||
/* 2nd iteration */
|
||||
var1 = ((q31_t) ((q63_t) var1 * (0x30000000 -
|
||||
((q31_t)
|
||||
((((q31_t)
|
||||
(((q63_t) var1 * var1) >> 31)) *
|
||||
(q63_t) half) >> 31))) >> 31)) << 2;
|
||||
/* 3rd iteration */
|
||||
var1 = ((q31_t) ((q63_t) var1 * (0x30000000 -
|
||||
((q31_t)
|
||||
((((q31_t)
|
||||
(((q63_t) var1 * var1) >> 31)) *
|
||||
(q63_t) half) >> 31))) >> 31)) << 2;
|
||||
|
||||
/* Multiply the inverse square root with the original value */
|
||||
var1 = ((q31_t) (((q63_t) temp1 * var1) >> 31)) << 1;
|
||||
|
||||
/* Shift the output down accordingly */
|
||||
if((signBits1 % 2) == 0)
|
||||
{
|
||||
var1 = var1 >> (signBits1 / 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
var1 = var1 >> ((signBits1 - 1) / 2);
|
||||
}
|
||||
*pOut = var1;
|
||||
|
||||
return (ARM_MATH_SUCCESS);
|
||||
}
|
||||
/* If the number is a negative number then store zero as its square root value */
|
||||
else
|
||||
{
|
||||
*pOut = 0;
|
||||
return (ARM_MATH_ARGUMENT_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of SQRT group
|
||||
*/
|
|
@ -1,105 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_biquad_cascade_df1_32x64_init_q31.c
|
||||
*
|
||||
* Description: High precision Q31 Biquad cascade filter initialization function.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupFilters
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BiquadCascadeDF1_32x64
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @details
|
||||
*
|
||||
* @param[in,out] *S points to an instance of the high precision Q31 Biquad cascade filter structure.
|
||||
* @param[in] numStages number of 2nd order stages in the filter.
|
||||
* @param[in] *pCoeffs points to the filter coefficients.
|
||||
* @param[in] *pState points to the state buffer.
|
||||
* @param[in] postShift Shift to be applied after the accumulator. Varies according to the coefficients format.
|
||||
* @return none
|
||||
*
|
||||
* <b>Coefficient and State Ordering:</b>
|
||||
*
|
||||
* \par
|
||||
* The coefficients are stored in the array <code>pCoeffs</code> in the following order:
|
||||
* <pre>
|
||||
* {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
|
||||
* </pre>
|
||||
* where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
|
||||
* <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
|
||||
* and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
|
||||
*
|
||||
* \par
|
||||
* The <code>pState</code> points to state variables array and size of each state variable is 1.63 format.
|
||||
* Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
|
||||
* The state variables are arranged in the state array as:
|
||||
* <pre>
|
||||
* {x[n-1], x[n-2], y[n-1], y[n-2]}
|
||||
* </pre>
|
||||
* The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
|
||||
* The state array has a total length of <code>4*numStages</code> values.
|
||||
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
||||
*/
|
||||
|
||||
void arm_biquad_cas_df1_32x64_init_q31(
|
||||
arm_biquad_cas_df1_32x64_ins_q31 * S,
|
||||
uint8_t numStages,
|
||||
q31_t * pCoeffs,
|
||||
q63_t * pState,
|
||||
uint8_t postShift)
|
||||
{
|
||||
/* Assign filter stages */
|
||||
S->numStages = numStages;
|
||||
|
||||
/* Assign postShift to be applied to the output */
|
||||
S->postShift = postShift;
|
||||
|
||||
/* Assign coefficient pointer */
|
||||
S->pCoeffs = pCoeffs;
|
||||
|
||||
/* Clear state buffer and size is always 4 * numStages */
|
||||
memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(q63_t));
|
||||
|
||||
/* Assign state pointer */
|
||||
S->pState = pState;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BiquadCascadeDF1_32x64 group
|
||||
*/
|
|
@ -1,553 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_biquad_cascade_df1_32x64_q31.c
|
||||
*
|
||||
* Description: High precision Q31 Biquad cascade filter processing function
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupFilters
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup BiquadCascadeDF1_32x64 High Precision Q31 Biquad Cascade Filter
|
||||
*
|
||||
* This function implements a high precision Biquad cascade filter which operates on
|
||||
* Q31 data values. The filter coefficients are in 1.31 format and the state variables
|
||||
* are in 1.63 format. The double precision state variables reduce quantization noise
|
||||
* in the filter and provide a cleaner output.
|
||||
* These filters are particularly useful when implementing filters in which the
|
||||
* singularities are close to the unit circle. This is common for low pass or high
|
||||
* pass filters with very low cutoff frequencies.
|
||||
*
|
||||
* The function operates on blocks of input and output data
|
||||
* and each call to the function processes <code>blockSize</code> samples through
|
||||
* the filter. <code>pSrc</code> and <code>pDst</code> points to input and output arrays
|
||||
* containing <code>blockSize</code> Q31 values.
|
||||
*
|
||||
* \par Algorithm
|
||||
* Each Biquad stage implements a second order filter using the difference equation:
|
||||
* <pre>
|
||||
* y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||
* </pre>
|
||||
* A Direct Form I algorithm is used with 5 coefficients and 4 state variables per stage.
|
||||
* \image html Biquad.gif "Single Biquad filter stage"
|
||||
* Coefficients <code>b0, b1, and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.
|
||||
* Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.
|
||||
* Pay careful attention to the sign of the feedback coefficients.
|
||||
* Some design tools use the difference equation
|
||||
* <pre>
|
||||
* y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] - a1 * y[n-1] - a2 * y[n-2]
|
||||
* </pre>
|
||||
* In this case the feedback coefficients <code>a1</code> and <code>a2</code> must be negated when used with the CMSIS DSP Library.
|
||||
*
|
||||
* \par
|
||||
* Higher order filters are realized as a cascade of second order sections.
|
||||
* <code>numStages</code> refers to the number of second order stages used.
|
||||
* For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.
|
||||
* \image html BiquadCascade.gif "8th order filter using a cascade of Biquad stages"
|
||||
* A 9th order filter would be realized with <code>numStages=5</code> second order stages with the coefficients for one of the stages configured as a first order filter (<code>b2=0</code> and <code>a2=0</code>).
|
||||
*
|
||||
* \par
|
||||
* The <code>pState</code> points to state variables array .
|
||||
* Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code> and each state variable in 1.63 format to improve precision.
|
||||
* The state variables are arranged in the array as:
|
||||
* <pre>
|
||||
* {x[n-1], x[n-2], y[n-1], y[n-2]}
|
||||
* </pre>
|
||||
*
|
||||
* \par
|
||||
* The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
|
||||
* The state array has a total length of <code>4*numStages</code> values of data in 1.63 format.
|
||||
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
||||
*
|
||||
* \par Instance Structure
|
||||
* The coefficients and state variables for a filter are stored together in an instance data structure.
|
||||
* A separate instance structure must be defined for each filter.
|
||||
* Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.
|
||||
*
|
||||
* \par Init Function
|
||||
* There is also an associated initialization function which performs the following operations:
|
||||
* - Sets the values of the internal structure fields.
|
||||
* - Zeros out the values in the state buffer.
|
||||
* \par
|
||||
* Use of the initialization function is optional.
|
||||
* However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
|
||||
* To place an instance structure into a const data section, the instance structure must be manually initialized.
|
||||
* Set the values in the state buffer to zeros before static initialization.
|
||||
* For example, to statically initialize the filter instance structure use
|
||||
* <pre>
|
||||
* arm_biquad_cas_df1_32x64_ins_q31 S1 = {numStages, pState, pCoeffs, postShift};
|
||||
* </pre>
|
||||
* where <code>numStages</code> is the number of Biquad stages in the filter; <code>pState</code> is the address of the state buffer;
|
||||
* <code>pCoeffs</code> is the address of the coefficient buffer; <code>postShift</code> shift to be applied which is described in detail below.
|
||||
* \par Fixed-Point Behavior
|
||||
* Care must be taken while using Biquad Cascade 32x64 filter function.
|
||||
* Following issues must be considered:
|
||||
* - Scaling of coefficients
|
||||
* - Filter gain
|
||||
* - Overflow and saturation
|
||||
*
|
||||
* \par
|
||||
* Filter coefficients are represented as fractional values and
|
||||
* restricted to lie in the range <code>[-1 +1)</code>.
|
||||
* The processing function has an additional scaling parameter <code>postShift</code>
|
||||
* which allows the filter coefficients to exceed the range <code>[+1 -1)</code>.
|
||||
* At the output of the filter's accumulator is a shift register which shifts the result by <code>postShift</code> bits.
|
||||
* \image html BiquadPostshift.gif "Fixed-point Biquad with shift by postShift bits after accumulator"
|
||||
* This essentially scales the filter coefficients by <code>2^postShift</code>.
|
||||
* For example, to realize the coefficients
|
||||
* <pre>
|
||||
* {1.5, -0.8, 1.2, 1.6, -0.9}
|
||||
* </pre>
|
||||
* set the Coefficient array to:
|
||||
* <pre>
|
||||
* {0.75, -0.4, 0.6, 0.8, -0.45}
|
||||
* </pre>
|
||||
* and set <code>postShift=1</code>
|
||||
*
|
||||
* \par
|
||||
* The second thing to keep in mind is the gain through the filter.
|
||||
* The frequency response of a Biquad filter is a function of its coefficients.
|
||||
* It is possible for the gain through the filter to exceed 1.0 meaning that the filter increases the amplitude of certain frequencies.
|
||||
* This means that an input signal with amplitude < 1.0 may result in an output > 1.0 and these are saturated or overflowed based on the implementation of the filter.
|
||||
* To avoid this behavior the filter needs to be scaled down such that its peak gain < 1.0 or the input signal must be scaled down so that the combination of input and filter are never overflowed.
|
||||
*
|
||||
* \par
|
||||
* The third item to consider is the overflow and saturation behavior of the fixed-point Q31 version.
|
||||
* This is described in the function specific documentation below.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BiquadCascadeDF1_32x64
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @details
|
||||
|
||||
* @param[in] *S points to an instance of the high precision Q31 Biquad cascade filter.
|
||||
* @param[in] *pSrc points to the block of input data.
|
||||
* @param[out] *pDst points to the block of output data.
|
||||
* @param[in] blockSize number of samples to process.
|
||||
* @return none.
|
||||
*
|
||||
* \par
|
||||
* The function is implemented using an internal 64-bit accumulator.
|
||||
* The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
|
||||
* Thus, if the accumulator result overflows it wraps around rather than clip.
|
||||
* In order to avoid overflows completely the input signal must be scaled down by 2 bits and lie in the range [-0.25 +0.25).
|
||||
* After all 5 multiply-accumulates are performed, the 2.62 accumulator is shifted by <code>postShift</code> bits and the result truncated to
|
||||
* 1.31 format by discarding the low 32 bits.
|
||||
*
|
||||
* \par
|
||||
* Two related functions are provided in the CMSIS DSP library.
|
||||
* <code>arm_biquad_cascade_df1_q31()</code> implements a Biquad cascade with 32-bit coefficients and state variables with a Q63 accumulator.
|
||||
* <code>arm_biquad_cascade_df1_fast_q31()</code> implements a Biquad cascade with 32-bit coefficients and state variables with a Q31 accumulator.
|
||||
*/
|
||||
|
||||
void arm_biquad_cas_df1_32x64_q31(
|
||||
const arm_biquad_cas_df1_32x64_ins_q31 * S,
|
||||
q31_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
q31_t *pIn = pSrc; /* input pointer initialization */
|
||||
q31_t *pOut = pDst; /* output pointer initialization */
|
||||
q63_t *pState = S->pState; /* state pointer initialization */
|
||||
q31_t *pCoeffs = S->pCoeffs; /* coeff pointer initialization */
|
||||
q63_t acc; /* accumulator */
|
||||
q31_t Xn1, Xn2; /* Input Filter state variables */
|
||||
q63_t Yn1, Yn2; /* Output Filter state variables */
|
||||
q31_t b0, b1, b2, a1, a2; /* Filter coefficients */
|
||||
q31_t Xn; /* temporary input */
|
||||
int32_t shift = (int32_t) S->postShift + 1; /* Shift to be applied to the output */
|
||||
uint32_t sample, stage = S->numStages; /* loop counters */
|
||||
q31_t acc_l, acc_h; /* temporary output */
|
||||
uint32_t uShift = ((uint32_t) S->postShift + 1u);
|
||||
uint32_t lShift = 32u - uShift; /* Shift to be applied to the output */
|
||||
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
do
|
||||
{
|
||||
/* Reading the coefficients */
|
||||
b0 = *pCoeffs++;
|
||||
b1 = *pCoeffs++;
|
||||
b2 = *pCoeffs++;
|
||||
a1 = *pCoeffs++;
|
||||
a2 = *pCoeffs++;
|
||||
|
||||
/* Reading the state values */
|
||||
Xn1 = (q31_t) (pState[0]);
|
||||
Xn2 = (q31_t) (pState[1]);
|
||||
Yn1 = pState[2];
|
||||
Yn2 = pState[3];
|
||||
|
||||
/* Apply loop unrolling and compute 4 output values simultaneously. */
|
||||
/* The variable acc hold output value that is being computed and
|
||||
* stored in the destination buffer
|
||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||
*/
|
||||
|
||||
sample = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(sample > 0u)
|
||||
{
|
||||
/* Read the input */
|
||||
Xn = *pIn++;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
|
||||
/* acc = b0 * x[n] */
|
||||
acc = (q63_t) Xn *b0;
|
||||
|
||||
/* acc += b1 * x[n-1] */
|
||||
acc += (q63_t) Xn1 *b1;
|
||||
|
||||
/* acc += b[2] * x[n-2] */
|
||||
acc += (q63_t) Xn2 *b2;
|
||||
|
||||
/* acc += a1 * y[n-1] */
|
||||
acc += mult32x64(Yn1, a1);
|
||||
|
||||
/* acc += a2 * y[n-2] */
|
||||
acc += mult32x64(Yn2, a2);
|
||||
|
||||
/* The result is converted to 1.63 , Yn2 variable is reused */
|
||||
Yn2 = acc << shift;
|
||||
|
||||
/* Calc lower part of acc */
|
||||
acc_l = acc & 0xffffffff;
|
||||
|
||||
/* Calc upper part of acc */
|
||||
acc_h = (acc >> 32) & 0xffffffff;
|
||||
|
||||
/* Apply shift for lower part of acc and upper part of acc */
|
||||
acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
||||
|
||||
/* Store the output in the destination buffer in 1.31 format. */
|
||||
*pOut = acc_h;
|
||||
|
||||
/* Read the second input into Xn2, to reuse the value */
|
||||
Xn2 = *pIn++;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
|
||||
/* acc += b1 * x[n-1] */
|
||||
acc = (q63_t) Xn *b1;
|
||||
|
||||
/* acc = b0 * x[n] */
|
||||
acc += (q63_t) Xn2 *b0;
|
||||
|
||||
/* acc += b[2] * x[n-2] */
|
||||
acc += (q63_t) Xn1 *b2;
|
||||
|
||||
/* acc += a1 * y[n-1] */
|
||||
acc += mult32x64(Yn2, a1);
|
||||
|
||||
/* acc += a2 * y[n-2] */
|
||||
acc += mult32x64(Yn1, a2);
|
||||
|
||||
/* The result is converted to 1.63, Yn1 variable is reused */
|
||||
Yn1 = acc << shift;
|
||||
|
||||
/* Calc lower part of acc */
|
||||
acc_l = acc & 0xffffffff;
|
||||
|
||||
/* Calc upper part of acc */
|
||||
acc_h = (acc >> 32) & 0xffffffff;
|
||||
|
||||
/* Apply shift for lower part of acc and upper part of acc */
|
||||
acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
||||
|
||||
/* Read the third input into Xn1, to reuse the value */
|
||||
Xn1 = *pIn++;
|
||||
|
||||
/* The result is converted to 1.31 */
|
||||
/* Store the output in the destination buffer. */
|
||||
*(pOut + 1u) = acc_h;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
|
||||
/* acc = b0 * x[n] */
|
||||
acc = (q63_t) Xn1 *b0;
|
||||
|
||||
/* acc += b1 * x[n-1] */
|
||||
acc += (q63_t) Xn2 *b1;
|
||||
|
||||
/* acc += b[2] * x[n-2] */
|
||||
acc += (q63_t) Xn *b2;
|
||||
|
||||
/* acc += a1 * y[n-1] */
|
||||
acc += mult32x64(Yn1, a1);
|
||||
|
||||
/* acc += a2 * y[n-2] */
|
||||
acc += mult32x64(Yn2, a2);
|
||||
|
||||
/* The result is converted to 1.63, Yn2 variable is reused */
|
||||
Yn2 = acc << shift;
|
||||
|
||||
/* Calc lower part of acc */
|
||||
acc_l = acc & 0xffffffff;
|
||||
|
||||
/* Calc upper part of acc */
|
||||
acc_h = (acc >> 32) & 0xffffffff;
|
||||
|
||||
/* Apply shift for lower part of acc and upper part of acc */
|
||||
acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
||||
|
||||
/* Store the output in the destination buffer in 1.31 format. */
|
||||
*(pOut + 2u) = acc_h;
|
||||
|
||||
/* Read the fourth input into Xn, to reuse the value */
|
||||
Xn = *pIn++;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
/* acc = b0 * x[n] */
|
||||
acc = (q63_t) Xn *b0;
|
||||
|
||||
/* acc += b1 * x[n-1] */
|
||||
acc += (q63_t) Xn1 *b1;
|
||||
|
||||
/* acc += b[2] * x[n-2] */
|
||||
acc += (q63_t) Xn2 *b2;
|
||||
|
||||
/* acc += a1 * y[n-1] */
|
||||
acc += mult32x64(Yn2, a1);
|
||||
|
||||
/* acc += a2 * y[n-2] */
|
||||
acc += mult32x64(Yn1, a2);
|
||||
|
||||
/* The result is converted to 1.63, Yn1 variable is reused */
|
||||
Yn1 = acc << shift;
|
||||
|
||||
/* Calc lower part of acc */
|
||||
acc_l = acc & 0xffffffff;
|
||||
|
||||
/* Calc upper part of acc */
|
||||
acc_h = (acc >> 32) & 0xffffffff;
|
||||
|
||||
/* Apply shift for lower part of acc and upper part of acc */
|
||||
acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
||||
|
||||
/* Store the output in the destination buffer in 1.31 format. */
|
||||
*(pOut + 3u) = acc_h;
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
/* Xn1 = Xn */
|
||||
/* Yn2 = Yn1 */
|
||||
/* Yn1 = acc */
|
||||
Xn2 = Xn1;
|
||||
Xn1 = Xn;
|
||||
|
||||
/* update output pointer */
|
||||
pOut += 4u;
|
||||
|
||||
/* decrement the loop counter */
|
||||
sample--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
sample = (blockSize & 0x3u);
|
||||
|
||||
while(sample > 0u)
|
||||
{
|
||||
/* Read the input */
|
||||
Xn = *pIn++;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
|
||||
/* acc = b0 * x[n] */
|
||||
acc = (q63_t) Xn *b0;
|
||||
/* acc += b1 * x[n-1] */
|
||||
acc += (q63_t) Xn1 *b1;
|
||||
/* acc += b[2] * x[n-2] */
|
||||
acc += (q63_t) Xn2 *b2;
|
||||
/* acc += a1 * y[n-1] */
|
||||
acc += mult32x64(Yn1, a1);
|
||||
/* acc += a2 * y[n-2] */
|
||||
acc += mult32x64(Yn2, a2);
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
/* Xn1 = Xn */
|
||||
/* Yn2 = Yn1 */
|
||||
/* Yn1 = acc */
|
||||
Xn2 = Xn1;
|
||||
Xn1 = Xn;
|
||||
Yn2 = Yn1;
|
||||
/* The result is converted to 1.63, Yn1 variable is reused */
|
||||
Yn1 = acc << shift;
|
||||
|
||||
/* Calc lower part of acc */
|
||||
acc_l = acc & 0xffffffff;
|
||||
|
||||
/* Calc upper part of acc */
|
||||
acc_h = (acc >> 32) & 0xffffffff;
|
||||
|
||||
/* Apply shift for lower part of acc and upper part of acc */
|
||||
acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
||||
|
||||
/* Store the output in the destination buffer in 1.31 format. */
|
||||
*pOut++ = acc_h;
|
||||
//Yn1 = acc << shift;
|
||||
|
||||
/* Store the output in the destination buffer in 1.31 format. */
|
||||
// *pOut++ = (q31_t) (acc >> (32 - shift));
|
||||
|
||||
/* decrement the loop counter */
|
||||
sample--;
|
||||
}
|
||||
|
||||
/* The first stage output is given as input to the second stage. */
|
||||
pIn = pDst;
|
||||
|
||||
/* Reset to destination buffer working pointer */
|
||||
pOut = pDst;
|
||||
|
||||
/* Store the updated state variables back into the pState array */
|
||||
/* Store the updated state variables back into the pState array */
|
||||
*pState++ = (q63_t) Xn1;
|
||||
*pState++ = (q63_t) Xn2;
|
||||
*pState++ = Yn1;
|
||||
*pState++ = Yn2;
|
||||
|
||||
} while(--stage);
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
do
|
||||
{
|
||||
/* Reading the coefficients */
|
||||
b0 = *pCoeffs++;
|
||||
b1 = *pCoeffs++;
|
||||
b2 = *pCoeffs++;
|
||||
a1 = *pCoeffs++;
|
||||
a2 = *pCoeffs++;
|
||||
|
||||
/* Reading the state values */
|
||||
Xn1 = pState[0];
|
||||
Xn2 = pState[1];
|
||||
Yn1 = pState[2];
|
||||
Yn2 = pState[3];
|
||||
|
||||
/* The variable acc hold output value that is being computed and
|
||||
* stored in the destination buffer
|
||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||
*/
|
||||
|
||||
sample = blockSize;
|
||||
|
||||
while(sample > 0u)
|
||||
{
|
||||
/* Read the input */
|
||||
Xn = *pIn++;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
/* acc = b0 * x[n] */
|
||||
acc = (q63_t) Xn *b0;
|
||||
/* acc += b1 * x[n-1] */
|
||||
acc += (q63_t) Xn1 *b1;
|
||||
/* acc += b[2] * x[n-2] */
|
||||
acc += (q63_t) Xn2 *b2;
|
||||
/* acc += a1 * y[n-1] */
|
||||
acc += mult32x64(Yn1, a1);
|
||||
/* acc += a2 * y[n-2] */
|
||||
acc += mult32x64(Yn2, a2);
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
/* Xn1 = Xn */
|
||||
/* Yn2 = Yn1 */
|
||||
/* Yn1 = acc */
|
||||
Xn2 = Xn1;
|
||||
Xn1 = Xn;
|
||||
Yn2 = Yn1;
|
||||
|
||||
/* The result is converted to 1.63, Yn1 variable is reused */
|
||||
Yn1 = acc << shift;
|
||||
|
||||
/* Calc lower part of acc */
|
||||
acc_l = acc & 0xffffffff;
|
||||
|
||||
/* Calc upper part of acc */
|
||||
acc_h = (acc >> 32) & 0xffffffff;
|
||||
|
||||
/* Apply shift for lower part of acc and upper part of acc */
|
||||
acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
||||
|
||||
/* Store the output in the destination buffer in 1.31 format. */
|
||||
*pOut++ = acc_h;
|
||||
|
||||
//Yn1 = acc << shift;
|
||||
|
||||
/* Store the output in the destination buffer in 1.31 format. */
|
||||
//*pOut++ = (q31_t) (acc >> (32 - shift));
|
||||
|
||||
/* decrement the loop counter */
|
||||
sample--;
|
||||
}
|
||||
|
||||
/* The first stage output is given as input to the second stage. */
|
||||
pIn = pDst;
|
||||
|
||||
/* Reset to destination buffer working pointer */
|
||||
pOut = pDst;
|
||||
|
||||
/* Store the updated state variables back into the pState array */
|
||||
*pState++ = (q63_t) Xn1;
|
||||
*pState++ = (q63_t) Xn2;
|
||||
*pState++ = Yn1;
|
||||
*pState++ = Yn2;
|
||||
|
||||
} while(--stage);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BiquadCascadeDF1_32x64 group
|
||||
*/
|
|
@ -1,421 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_biquad_cascade_df1_f32.c
|
||||
*
|
||||
* Description: Processing function for the
|
||||
* floating-point Biquad cascade DirectFormI(DF1) filter.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.5 2010/04/26
|
||||
* incorporated review comments and updated with latest CMSIS layer
|
||||
*
|
||||
* Version 0.0.3 2010/03/10
|
||||
* Initial version
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupFilters
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup BiquadCascadeDF1 Biquad Cascade IIR Filters Using Direct Form I Structure
|
||||
*
|
||||
* This set of functions implements arbitrary order recursive (IIR) filters.
|
||||
* The filters are implemented as a cascade of second order Biquad sections.
|
||||
* The functions support Q15, Q31 and floating-point data types.
|
||||
* Fast version of Q15 and Q31 also supported on CortexM4 and Cortex-M3.
|
||||
*
|
||||
* The functions operate on blocks of input and output data and each call to the function
|
||||
* processes <code>blockSize</code> samples through the filter.
|
||||
* <code>pSrc</code> points to the array of input data and
|
||||
* <code>pDst</code> points to the array of output data.
|
||||
* Both arrays contain <code>blockSize</code> values.
|
||||
*
|
||||
* \par Algorithm
|
||||
* Each Biquad stage implements a second order filter using the difference equation:
|
||||
* <pre>
|
||||
* y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||
* </pre>
|
||||
* A Direct Form I algorithm is used with 5 coefficients and 4 state variables per stage.
|
||||
* \image html Biquad.gif "Single Biquad filter stage"
|
||||
* Coefficients <code>b0, b1 and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.
|
||||
* Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.
|
||||
* Pay careful attention to the sign of the feedback coefficients.
|
||||
* Some design tools use the difference equation
|
||||
* <pre>
|
||||
* y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] - a1 * y[n-1] - a2 * y[n-2]
|
||||
* </pre>
|
||||
* In this case the feedback coefficients <code>a1</code> and <code>a2</code> must be negated when used with the CMSIS DSP Library.
|
||||
*
|
||||
* \par
|
||||
* Higher order filters are realized as a cascade of second order sections.
|
||||
* <code>numStages</code> refers to the number of second order stages used.
|
||||
* For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.
|
||||
* \image html BiquadCascade.gif "8th order filter using a cascade of Biquad stages"
|
||||
* A 9th order filter would be realized with <code>numStages=5</code> second order stages with the coefficients for one of the stages configured as a first order filter (<code>b2=0</code> and <code>a2=0</code>).
|
||||
*
|
||||
* \par
|
||||
* The <code>pState</code> points to state variables array.
|
||||
* Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
|
||||
* The state variables are arranged in the <code>pState</code> array as:
|
||||
* <pre>
|
||||
* {x[n-1], x[n-2], y[n-1], y[n-2]}
|
||||
* </pre>
|
||||
*
|
||||
* \par
|
||||
* The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
|
||||
* The state array has a total length of <code>4*numStages</code> values.
|
||||
* The state variables are updated after each block of data is processed, the coefficients are untouched.
|
||||
*
|
||||
* \par Instance Structure
|
||||
* The coefficients and state variables for a filter are stored together in an instance data structure.
|
||||
* A separate instance structure must be defined for each filter.
|
||||
* Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.
|
||||
* There are separate instance structure declarations for each of the 3 supported data types.
|
||||
*
|
||||
* \par Init Functions
|
||||
* There is also an associated initialization function for each data type.
|
||||
* The initialization function performs following operations:
|
||||
* - Sets the values of the internal structure fields.
|
||||
* - Zeros out the values in the state buffer.
|
||||
*
|
||||
* \par
|
||||
* Use of the initialization function is optional.
|
||||
* However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
|
||||
* To place an instance structure into a const data section, the instance structure must be manually initialized.
|
||||
* Set the values in the state buffer to zeros before static initialization.
|
||||
* The code below statically initializes each of the 3 different data type filter instance structures
|
||||
* <pre>
|
||||
* arm_biquad_casd_df1_inst_f32 S1 = {numStages, pState, pCoeffs};
|
||||
* arm_biquad_casd_df1_inst_q15 S2 = {numStages, pState, pCoeffs, postShift};
|
||||
* arm_biquad_casd_df1_inst_q31 S3 = {numStages, pState, pCoeffs, postShift};
|
||||
* </pre>
|
||||
* where <code>numStages</code> is the number of Biquad stages in the filter; <code>pState</code> is the address of the state buffer;
|
||||
* <code>pCoeffs</code> is the address of the coefficient buffer; <code>postShift</code> shift to be applied.
|
||||
*
|
||||
* \par Fixed-Point Behavior
|
||||
* Care must be taken when using the Q15 and Q31 versions of the Biquad Cascade filter functions.
|
||||
* Following issues must be considered:
|
||||
* - Scaling of coefficients
|
||||
* - Filter gain
|
||||
* - Overflow and saturation
|
||||
*
|
||||
* \par
|
||||
* <b>Scaling of coefficients: </b>
|
||||
* Filter coefficients are represented as fractional values and
|
||||
* coefficients are restricted to lie in the range <code>[-1 +1)</code>.
|
||||
* The fixed-point functions have an additional scaling parameter <code>postShift</code>
|
||||
* which allow the filter coefficients to exceed the range <code>[+1 -1)</code>.
|
||||
* At the output of the filter's accumulator is a shift register which shifts the result by <code>postShift</code> bits.
|
||||
* \image html BiquadPostshift.gif "Fixed-point Biquad with shift by postShift bits after accumulator"
|
||||
* This essentially scales the filter coefficients by <code>2^postShift</code>.
|
||||
* For example, to realize the coefficients
|
||||
* <pre>
|
||||
* {1.5, -0.8, 1.2, 1.6, -0.9}
|
||||
* </pre>
|
||||
* set the pCoeffs array to:
|
||||
* <pre>
|
||||
* {0.75, -0.4, 0.6, 0.8, -0.45}
|
||||
* </pre>
|
||||
* and set <code>postShift=1</code>
|
||||
*
|
||||
* \par
|
||||
* <b>Filter gain: </b>
|
||||
* The frequency response of a Biquad filter is a function of its coefficients.
|
||||
* It is possible for the gain through the filter to exceed 1.0 meaning that the filter increases the amplitude of certain frequencies.
|
||||
* This means that an input signal with amplitude < 1.0 may result in an output > 1.0 and these are saturated or overflowed based on the implementation of the filter.
|
||||
* To avoid this behavior the filter needs to be scaled down such that its peak gain < 1.0 or the input signal must be scaled down so that the combination of input and filter are never overflowed.
|
||||
*
|
||||
* \par
|
||||
* <b>Overflow and saturation: </b>
|
||||
* For Q15 and Q31 versions, it is described separately as part of the function specific documentation below.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BiquadCascadeDF1
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @param[in] *S points to an instance of the floating-point Biquad cascade structure.
|
||||
* @param[in] *pSrc points to the block of input data.
|
||||
* @param[out] *pDst points to the block of output data.
|
||||
* @param[in] blockSize number of samples to process per call.
|
||||
* @return none.
|
||||
*
|
||||
*/
|
||||
|
||||
void arm_biquad_cascade_df1_f32(
|
||||
const arm_biquad_casd_df1_inst_f32 * S,
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
float32_t *pIn = pSrc; /* source pointer */
|
||||
float32_t *pOut = pDst; /* destination pointer */
|
||||
float32_t *pState = S->pState; /* pState pointer */
|
||||
float32_t *pCoeffs = S->pCoeffs; /* coefficient pointer */
|
||||
float32_t acc; /* Simulates the accumulator */
|
||||
float32_t b0, b1, b2, a1, a2; /* Filter coefficients */
|
||||
float32_t Xn1, Xn2, Yn1, Yn2; /* Filter pState variables */
|
||||
float32_t Xn; /* temporary input */
|
||||
uint32_t sample, stage = S->numStages; /* loop counters */
|
||||
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
do
|
||||
{
|
||||
/* Reading the coefficients */
|
||||
b0 = *pCoeffs++;
|
||||
b1 = *pCoeffs++;
|
||||
b2 = *pCoeffs++;
|
||||
a1 = *pCoeffs++;
|
||||
a2 = *pCoeffs++;
|
||||
|
||||
/* Reading the pState values */
|
||||
Xn1 = pState[0];
|
||||
Xn2 = pState[1];
|
||||
Yn1 = pState[2];
|
||||
Yn2 = pState[3];
|
||||
|
||||
/* Apply loop unrolling and compute 4 output values simultaneously. */
|
||||
/* The variable acc hold output values that are being computed:
|
||||
*
|
||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||
*/
|
||||
|
||||
sample = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(sample > 0u)
|
||||
{
|
||||
/* Read the first input */
|
||||
Xn = *pIn++;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
Yn2 = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn1) + (a2 * Yn2);
|
||||
|
||||
/* Store the result in the accumulator in the destination buffer. */
|
||||
*pOut++ = Yn2;
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
/* Xn1 = Xn */
|
||||
/* Yn2 = Yn1 */
|
||||
/* Yn1 = acc */
|
||||
|
||||
/* Read the second input */
|
||||
Xn2 = *pIn++;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
Yn1 = (b0 * Xn2) + (b1 * Xn) + (b2 * Xn1) + (a1 * Yn2) + (a2 * Yn1);
|
||||
|
||||
/* Store the result in the accumulator in the destination buffer. */
|
||||
*pOut++ = Yn1;
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
/* Xn1 = Xn */
|
||||
/* Yn2 = Yn1 */
|
||||
/* Yn1 = acc */
|
||||
|
||||
/* Read the third input */
|
||||
Xn1 = *pIn++;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
Yn2 = (b0 * Xn1) + (b1 * Xn2) + (b2 * Xn) + (a1 * Yn1) + (a2 * Yn2);
|
||||
|
||||
/* Store the result in the accumulator in the destination buffer. */
|
||||
*pOut++ = Yn2;
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
/* Xn1 = Xn */
|
||||
/* Yn2 = Yn1 */
|
||||
/* Yn1 = acc */
|
||||
|
||||
/* Read the forth input */
|
||||
Xn = *pIn++;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
Yn1 = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn2) + (a2 * Yn1);
|
||||
|
||||
/* Store the result in the accumulator in the destination buffer. */
|
||||
*pOut++ = Yn1;
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
/* Xn1 = Xn */
|
||||
/* Yn2 = Yn1 */
|
||||
/* Yn1 = acc */
|
||||
Xn2 = Xn1;
|
||||
Xn1 = Xn;
|
||||
|
||||
/* decrement the loop counter */
|
||||
sample--;
|
||||
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
sample = blockSize & 0x3u;
|
||||
|
||||
while(sample > 0u)
|
||||
{
|
||||
/* Read the input */
|
||||
Xn = *pIn++;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
acc = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn1) + (a2 * Yn2);
|
||||
|
||||
/* Store the result in the accumulator in the destination buffer. */
|
||||
*pOut++ = acc;
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
/* Xn1 = Xn */
|
||||
/* Yn2 = Yn1 */
|
||||
/* Yn1 = acc */
|
||||
Xn2 = Xn1;
|
||||
Xn1 = Xn;
|
||||
Yn2 = Yn1;
|
||||
Yn1 = acc;
|
||||
|
||||
/* decrement the loop counter */
|
||||
sample--;
|
||||
|
||||
}
|
||||
|
||||
/* Store the updated state variables back into the pState array */
|
||||
*pState++ = Xn1;
|
||||
*pState++ = Xn2;
|
||||
*pState++ = Yn1;
|
||||
*pState++ = Yn2;
|
||||
|
||||
/* The first stage goes from the input buffer to the output buffer. */
|
||||
/* Subsequent numStages occur in-place in the output buffer */
|
||||
pIn = pDst;
|
||||
|
||||
/* Reset the output pointer */
|
||||
pOut = pDst;
|
||||
|
||||
/* decrement the loop counter */
|
||||
stage--;
|
||||
|
||||
} while(stage > 0u);
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
do
|
||||
{
|
||||
/* Reading the coefficients */
|
||||
b0 = *pCoeffs++;
|
||||
b1 = *pCoeffs++;
|
||||
b2 = *pCoeffs++;
|
||||
a1 = *pCoeffs++;
|
||||
a2 = *pCoeffs++;
|
||||
|
||||
/* Reading the pState values */
|
||||
Xn1 = pState[0];
|
||||
Xn2 = pState[1];
|
||||
Yn1 = pState[2];
|
||||
Yn2 = pState[3];
|
||||
|
||||
/* The variables acc holds the output value that is computed:
|
||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||
*/
|
||||
|
||||
sample = blockSize;
|
||||
|
||||
while(sample > 0u)
|
||||
{
|
||||
/* Read the input */
|
||||
Xn = *pIn++;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
acc = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn1) + (a2 * Yn2);
|
||||
|
||||
/* Store the result in the accumulator in the destination buffer. */
|
||||
*pOut++ = acc;
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
/* Xn1 = Xn */
|
||||
/* Yn2 = Yn1 */
|
||||
/* Yn1 = acc */
|
||||
Xn2 = Xn1;
|
||||
Xn1 = Xn;
|
||||
Yn2 = Yn1;
|
||||
Yn1 = acc;
|
||||
|
||||
/* decrement the loop counter */
|
||||
sample--;
|
||||
}
|
||||
|
||||
/* Store the updated state variables back into the pState array */
|
||||
*pState++ = Xn1;
|
||||
*pState++ = Xn2;
|
||||
*pState++ = Yn1;
|
||||
*pState++ = Yn2;
|
||||
|
||||
/* The first stage goes from the input buffer to the output buffer. */
|
||||
/* Subsequent numStages occur in-place in the output buffer */
|
||||
pIn = pDst;
|
||||
|
||||
/* Reset the output pointer */
|
||||
pOut = pDst;
|
||||
|
||||
/* decrement the loop counter */
|
||||
stage--;
|
||||
|
||||
} while(stage > 0u);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @} end of BiquadCascadeDF1 group
|
||||
*/
|
|
@ -1,283 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_biquad_cascade_df1_fast_q15.c
|
||||
*
|
||||
* Description: Fast processing function for the
|
||||
* Q15 Biquad cascade filter.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.9 2010/08/16
|
||||
* Initial version
|
||||
*
|
||||
*
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupFilters
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BiquadCascadeDF1
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @details
|
||||
* @param[in] *S points to an instance of the Q15 Biquad cascade structure.
|
||||
* @param[in] *pSrc points to the block of input data.
|
||||
* @param[out] *pDst points to the block of output data.
|
||||
* @param[in] blockSize number of samples to process per call.
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* This fast version uses a 32-bit accumulator with 2.30 format.
|
||||
* The accumulator maintains full precision of the intermediate multiplication results but provides only a single guard bit.
|
||||
* Thus, if the accumulator result overflows it wraps around and distorts the result.
|
||||
* In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25).
|
||||
* The 2.30 accumulator is then shifted by <code>postShift</code> bits and the result truncated to 1.15 format by discarding the low 16 bits.
|
||||
*
|
||||
* \par
|
||||
* Refer to the function <code>arm_biquad_cascade_df1_q15()</code> for a slower implementation of this function which uses 64-bit accumulation to avoid wrap around distortion. Both the slow and the fast versions use the same instance structure.
|
||||
* Use the function <code>arm_biquad_cascade_df1_init_q15()</code> to initialize the filter structure.
|
||||
*
|
||||
*/
|
||||
|
||||
void arm_biquad_cascade_df1_fast_q15(
|
||||
const arm_biquad_casd_df1_inst_q15 * S,
|
||||
q15_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
q15_t *pIn = pSrc; /* Source pointer */
|
||||
q15_t *pOut = pDst; /* Destination pointer */
|
||||
q31_t in; /* Temporary variable to hold input value */
|
||||
q31_t out; /* Temporary variable to hold output value */
|
||||
q31_t b0; /* Temporary variable to hold bo value */
|
||||
q31_t b1, a1; /* Filter coefficients */
|
||||
q31_t state_in, state_out; /* Filter state variables */
|
||||
q31_t acc; /* Accumulator */
|
||||
int32_t shift = (int32_t) (15 - S->postShift); /* Post shift */
|
||||
q15_t *pState = S->pState; /* State pointer */
|
||||
q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
|
||||
uint32_t sample, stage = S->numStages; /* Stage loop counter */
|
||||
|
||||
|
||||
|
||||
do
|
||||
{
|
||||
|
||||
/* Read the b0 and 0 coefficients using SIMD */
|
||||
b0 = *__SIMD32(pCoeffs)++;
|
||||
|
||||
/* Read the b1 and b2 coefficients using SIMD */
|
||||
b1 = *__SIMD32(pCoeffs)++;
|
||||
|
||||
/* Read the a1 and a2 coefficients using SIMD */
|
||||
a1 = *__SIMD32(pCoeffs)++;
|
||||
|
||||
/* Read the input state values from the state buffer: x[n-1], x[n-2] */
|
||||
state_in = *__SIMD32(pState)++;
|
||||
|
||||
/* Read the output state values from the state buffer: y[n-1], y[n-2] */
|
||||
state_out = *__SIMD32(pState)--;
|
||||
|
||||
/* Apply loop unrolling and compute 2 output values simultaneously. */
|
||||
/* The variable acc hold output values that are being computed:
|
||||
*
|
||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||
*/
|
||||
sample = blockSize >> 1u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 2 outputs at a time.
|
||||
** a second loop below computes the remaining 1 sample. */
|
||||
while(sample > 0u)
|
||||
{
|
||||
|
||||
/* Read the input */
|
||||
in = *__SIMD32(pIn)++;
|
||||
|
||||
/* out = b0 * x[n] + 0 * 0 */
|
||||
out = __SMUAD(b0, in);
|
||||
/* acc = b1 * x[n-1] + acc += b2 * x[n-2] + out */
|
||||
acc = __SMLAD(b1, state_in, out);
|
||||
/* acc += a1 * y[n-1] + acc += a2 * y[n-2] */
|
||||
acc = __SMLAD(a1, state_out, acc);
|
||||
|
||||
/* The result is converted from 3.29 to 1.31 and then saturation is applied */
|
||||
out = __SSAT((acc >> shift), 16);
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
/* Xn1 = Xn */
|
||||
/* Yn2 = Yn1 */
|
||||
/* Yn1 = acc */
|
||||
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
|
||||
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
state_in = __PKHBT(in, state_in, 16);
|
||||
state_out = __PKHBT(out, state_out, 16);
|
||||
|
||||
#else
|
||||
|
||||
state_in = __PKHBT(state_in >> 16, (in >> 16), 16);
|
||||
state_out = __PKHBT(state_out >> 16, (out), 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* out = b0 * x[n] + 0 * 0 */
|
||||
out = __SMUADX(b0, in);
|
||||
/* acc0 = b1 * x[n-1] , acc0 += b2 * x[n-2] + out */
|
||||
acc = __SMLAD(b1, state_in, out);
|
||||
/* acc += a1 * y[n-1] + acc += a2 * y[n-2] */
|
||||
acc = __SMLAD(a1, state_out, acc);
|
||||
|
||||
/* The result is converted from 3.29 to 1.31 and then saturation is applied */
|
||||
out = __SSAT((acc >> shift), 16);
|
||||
|
||||
|
||||
/* Store the output in the destination buffer. */
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
*__SIMD32(pOut)++ = __PKHBT(state_out, out, 16);
|
||||
|
||||
#else
|
||||
|
||||
*__SIMD32(pOut)++ = __PKHBT(out, state_out >> 16, 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
/* Xn1 = Xn */
|
||||
/* Yn2 = Yn1 */
|
||||
/* Yn1 = acc */
|
||||
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
|
||||
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
state_in = __PKHBT(in >> 16, state_in, 16);
|
||||
state_out = __PKHBT(out, state_out, 16);
|
||||
|
||||
#else
|
||||
|
||||
state_in = __PKHBT(state_in >> 16, in, 16);
|
||||
state_out = __PKHBT(state_out >> 16, out, 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
|
||||
/* Decrement the loop counter */
|
||||
sample--;
|
||||
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 2, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
|
||||
if((blockSize & 0x1u) != 0u)
|
||||
{
|
||||
/* Read the input */
|
||||
in = *pIn++;
|
||||
|
||||
/* out = b0 * x[n] + 0 * 0 */
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
out = __SMUAD(b0, in);
|
||||
|
||||
#else
|
||||
|
||||
out = __SMUADX(b0, in);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* acc = b1 * x[n-1], acc += b2 * x[n-2] + out */
|
||||
acc = __SMLAD(b1, state_in, out);
|
||||
/* acc += a1 * y[n-1] + acc += a2 * y[n-2] */
|
||||
acc = __SMLAD(a1, state_out, acc);
|
||||
|
||||
/* The result is converted from 3.29 to 1.31 and then saturation is applied */
|
||||
out = __SSAT((acc >> shift), 16);
|
||||
|
||||
/* Store the output in the destination buffer. */
|
||||
*pOut++ = (q15_t) out;
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
/* Xn1 = Xn */
|
||||
/* Yn2 = Yn1 */
|
||||
/* Yn1 = acc */
|
||||
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
|
||||
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
state_in = __PKHBT(in, state_in, 16);
|
||||
state_out = __PKHBT(out, state_out, 16);
|
||||
|
||||
#else
|
||||
|
||||
state_in = __PKHBT(state_in >> 16, in, 16);
|
||||
state_out = __PKHBT(state_out >> 16, out, 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
}
|
||||
|
||||
/* The first stage goes from the input buffer to the output buffer. */
|
||||
/* Subsequent (numStages - 1) occur in-place in the output buffer */
|
||||
pIn = pDst;
|
||||
|
||||
/* Reset the output pointer */
|
||||
pOut = pDst;
|
||||
|
||||
/* Store the updated state variables back into the state array */
|
||||
*__SIMD32(pState)++ = state_in;
|
||||
*__SIMD32(pState)++ = state_out;
|
||||
|
||||
|
||||
/* Decrement the loop counter */
|
||||
stage--;
|
||||
|
||||
} while(stage > 0u);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @} end of BiquadCascadeDF1 group
|
||||
*/
|
|
@ -1,275 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_biquad_cascade_df1_fast_q31.c
|
||||
*
|
||||
* Description: Processing function for the
|
||||
* Q31 Fast Biquad cascade DirectFormI(DF1) filter.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.9 2010/08/27
|
||||
* Initial version
|
||||
*
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupFilters
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BiquadCascadeDF1
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @details
|
||||
*
|
||||
* @param[in] *S points to an instance of the Q31 Biquad cascade structure.
|
||||
* @param[in] *pSrc points to the block of input data.
|
||||
* @param[out] *pDst points to the block of output data.
|
||||
* @param[in] blockSize number of samples to process per call.
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* This function is optimized for speed at the expense of fixed-point precision and overflow protection.
|
||||
* The result of each 1.31 x 1.31 multiplication is truncated to 2.30 format.
|
||||
* These intermediate results are added to a 2.30 accumulator.
|
||||
* Finally, the accumulator is saturated and converted to a 1.31 result.
|
||||
* The fast version has the same overflow behavior as the standard version and provides less precision since it discards the low 32 bits of each multiplication result.
|
||||
* In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25). Use the intialization function
|
||||
* arm_biquad_cascade_df1_init_q31() to initialize filter structure.
|
||||
*
|
||||
* \par
|
||||
* Refer to the function <code>arm_biquad_cascade_df1_q31()</code> for a slower implementation of this function which uses 64-bit accumulation to provide higher precision. Both the slow and the fast versions use the same instance structure.
|
||||
* Use the function <code>arm_biquad_cascade_df1_init_q31()</code> to initialize the filter structure.
|
||||
*/
|
||||
|
||||
void arm_biquad_cascade_df1_fast_q31(
|
||||
const arm_biquad_casd_df1_inst_q31 * S,
|
||||
q31_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
q31_t acc; /* accumulator */
|
||||
q31_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */
|
||||
q31_t b0, b1, b2, a1, a2; /* Filter coefficients */
|
||||
q31_t *pIn = pSrc; /* input pointer initialization */
|
||||
q31_t *pOut = pDst; /* output pointer initialization */
|
||||
q31_t *pState = S->pState; /* pState pointer initialization */
|
||||
q31_t *pCoeffs = S->pCoeffs; /* coeff pointer initialization */
|
||||
q31_t Xn; /* temporary input */
|
||||
int32_t shift = (int32_t) S->postShift + 1; /* Shift to be applied to the output */
|
||||
uint32_t sample, stage = S->numStages; /* loop counters */
|
||||
|
||||
|
||||
do
|
||||
{
|
||||
/* Reading the coefficients */
|
||||
b0 = *pCoeffs++;
|
||||
b1 = *pCoeffs++;
|
||||
b2 = *pCoeffs++;
|
||||
a1 = *pCoeffs++;
|
||||
a2 = *pCoeffs++;
|
||||
|
||||
/* Reading the state values */
|
||||
Xn1 = pState[0];
|
||||
Xn2 = pState[1];
|
||||
Yn1 = pState[2];
|
||||
Yn2 = pState[3];
|
||||
|
||||
/* Apply loop unrolling and compute 4 output values simultaneously. */
|
||||
/* The variables acc ... acc3 hold output values that are being computed:
|
||||
*
|
||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||
*/
|
||||
|
||||
sample = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(sample > 0u)
|
||||
{
|
||||
/* Read the input */
|
||||
Xn = *pIn;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
/* acc = b0 * x[n] */
|
||||
acc = (q31_t) (((q63_t) b1 * Xn1) >> 32);
|
||||
/* acc += b1 * x[n-1] */
|
||||
acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b0 * (Xn))) >> 32);
|
||||
/* acc += b[2] * x[n-2] */
|
||||
acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);
|
||||
/* acc += a1 * y[n-1] */
|
||||
acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);
|
||||
/* acc += a2 * y[n-2] */
|
||||
acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);
|
||||
|
||||
/* The result is converted to 1.31 , Yn2 variable is reused */
|
||||
Yn2 = acc << shift;
|
||||
|
||||
/* Read the second input */
|
||||
Xn2 = *(pIn + 1u);
|
||||
|
||||
/* Store the output in the destination buffer. */
|
||||
*pOut = Yn2;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
/* acc = b0 * x[n] */
|
||||
acc = (q31_t) (((q63_t) b0 * (Xn2)) >> 32);
|
||||
/* acc += b1 * x[n-1] */
|
||||
acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn))) >> 32);
|
||||
/* acc += b[2] * x[n-2] */
|
||||
acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn1))) >> 32);
|
||||
/* acc += a1 * y[n-1] */
|
||||
acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn2))) >> 32);
|
||||
/* acc += a2 * y[n-2] */
|
||||
acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn1))) >> 32);
|
||||
|
||||
/* The result is converted to 1.31, Yn1 variable is reused */
|
||||
Yn1 = acc << shift;
|
||||
|
||||
/* Read the third input */
|
||||
Xn1 = *(pIn + 2u);
|
||||
|
||||
/* Store the output in the destination buffer. */
|
||||
*(pOut + 1u) = Yn1;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
/* acc = b0 * x[n] */
|
||||
acc = (q31_t) (((q63_t) b0 * (Xn1)) >> 32);
|
||||
/* acc += b1 * x[n-1] */
|
||||
acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn2))) >> 32);
|
||||
/* acc += b[2] * x[n-2] */
|
||||
acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn))) >> 32);
|
||||
/* acc += a1 * y[n-1] */
|
||||
acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);
|
||||
/* acc += a2 * y[n-2] */
|
||||
acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);
|
||||
|
||||
/* The result is converted to 1.31, Yn2 variable is reused */
|
||||
Yn2 = acc << shift;
|
||||
|
||||
/* Read the forth input */
|
||||
Xn = *(pIn + 3u);
|
||||
|
||||
/* Store the output in the destination buffer. */
|
||||
*(pOut + 2u) = Yn2;
|
||||
pIn += 4u;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
/* acc = b0 * x[n] */
|
||||
acc = (q31_t) (((q63_t) b0 * (Xn)) >> 32);
|
||||
/* acc += b1 * x[n-1] */
|
||||
acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32);
|
||||
/* acc += b[2] * x[n-2] */
|
||||
acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);
|
||||
/* acc += a1 * y[n-1] */
|
||||
acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn2))) >> 32);
|
||||
/* acc += a2 * y[n-2] */
|
||||
acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn1))) >> 32);
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
Xn2 = Xn1;
|
||||
|
||||
/* The result is converted to 1.31, Yn1 variable is reused */
|
||||
Yn1 = acc << shift;
|
||||
|
||||
/* Xn1 = Xn */
|
||||
Xn1 = Xn;
|
||||
|
||||
/* Store the output in the destination buffer. */
|
||||
*(pOut + 3u) = Yn1;
|
||||
pOut += 4u;
|
||||
|
||||
/* decrement the loop counter */
|
||||
sample--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
sample = (blockSize & 0x3u);
|
||||
|
||||
while(sample > 0u)
|
||||
{
|
||||
/* Read the input */
|
||||
Xn = *pIn++;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
/* acc = b0 * x[n] */
|
||||
acc = (q31_t) (((q63_t) b0 * (Xn)) >> 32);
|
||||
/* acc += b1 * x[n-1] */
|
||||
acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32);
|
||||
/* acc += b[2] * x[n-2] */
|
||||
acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);
|
||||
/* acc += a1 * y[n-1] */
|
||||
acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);
|
||||
/* acc += a2 * y[n-2] */
|
||||
acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);
|
||||
/* The result is converted to 1.31 */
|
||||
acc = acc << shift;
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
/* Xn1 = Xn */
|
||||
/* Yn2 = Yn1 */
|
||||
/* Yn1 = acc */
|
||||
Xn2 = Xn1;
|
||||
Xn1 = Xn;
|
||||
Yn2 = Yn1;
|
||||
Yn1 = acc;
|
||||
|
||||
/* Store the output in the destination buffer. */
|
||||
*pOut++ = acc;
|
||||
|
||||
/* decrement the loop counter */
|
||||
sample--;
|
||||
}
|
||||
|
||||
/* The first stage goes from the input buffer to the output buffer. */
|
||||
/* Subsequent stages occur in-place in the output buffer */
|
||||
pIn = pDst;
|
||||
|
||||
/* Reset to destination pointer */
|
||||
pOut = pDst;
|
||||
|
||||
/* Store the updated state variables back into the pState array */
|
||||
*pState++ = Xn1;
|
||||
*pState++ = Xn2;
|
||||
*pState++ = Yn1;
|
||||
*pState++ = Yn2;
|
||||
|
||||
} while(--stage);
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BiquadCascadeDF1 group
|
||||
*/
|
|
@ -1,107 +0,0 @@
|
|||
/*-----------------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_biquad_cascade_df1_init_f32.c
|
||||
*
|
||||
* Description: floating-point Biquad cascade DirectFormI(DF1) filter initialization function.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.5 2010/04/26
|
||||
* incorporated review comments and updated with latest CMSIS layer
|
||||
*
|
||||
* Version 0.0.3 2010/03/10
|
||||
* Initial version
|
||||
* ---------------------------------------------------------------------------*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupFilters
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BiquadCascadeDF1
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @details
|
||||
* @brief Initialization function for the floating-point Biquad cascade filter.
|
||||
* @param[in,out] *S points to an instance of the floating-point Biquad cascade structure.
|
||||
* @param[in] numStages number of 2nd order stages in the filter.
|
||||
* @param[in] *pCoeffs points to the filter coefficients array.
|
||||
* @param[in] *pState points to the state array.
|
||||
* @return none
|
||||
*
|
||||
*
|
||||
* <b>Coefficient and State Ordering:</b>
|
||||
*
|
||||
* \par
|
||||
* The coefficients are stored in the array <code>pCoeffs</code> in the following order:
|
||||
* <pre>
|
||||
* {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
|
||||
* </pre>
|
||||
*
|
||||
* \par
|
||||
* where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
|
||||
* <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
|
||||
* and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
|
||||
*
|
||||
* \par
|
||||
* The <code>pState</code> is a pointer to state array.
|
||||
* Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
|
||||
* The state variables are arranged in the <code>pState</code> array as:
|
||||
* <pre>
|
||||
* {x[n-1], x[n-2], y[n-1], y[n-2]}
|
||||
* </pre>
|
||||
* The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
|
||||
* The state array has a total length of <code>4*numStages</code> values.
|
||||
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
||||
*
|
||||
*/
|
||||
|
||||
void arm_biquad_cascade_df1_init_f32(
|
||||
arm_biquad_casd_df1_inst_f32 * S,
|
||||
uint8_t numStages,
|
||||
float32_t * pCoeffs,
|
||||
float32_t * pState)
|
||||
{
|
||||
/* Assign filter stages */
|
||||
S->numStages = numStages;
|
||||
|
||||
/* Assign coefficient pointer */
|
||||
S->pCoeffs = pCoeffs;
|
||||
|
||||
/* Clear state buffer and size is always 4 * numStages */
|
||||
memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(float32_t));
|
||||
|
||||
/* Assign state pointer */
|
||||
S->pState = pState;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BiquadCascadeDF1 group
|
||||
*/
|
|
@ -1,109 +0,0 @@
|
|||
/*-----------------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_biquad_cascade_df1_init_q15.c
|
||||
*
|
||||
* Description: Q15 Biquad cascade DirectFormI(DF1) filter initialization function.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.5 2010/04/26
|
||||
* incorporated review comments and updated with latest CMSIS layer
|
||||
*
|
||||
* Version 0.0.3 2010/03/10
|
||||
* Initial version
|
||||
* ---------------------------------------------------------------------------*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupFilters
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BiquadCascadeDF1
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @details
|
||||
*
|
||||
* @param[in,out] *S points to an instance of the Q15 Biquad cascade structure.
|
||||
* @param[in] numStages number of 2nd order stages in the filter.
|
||||
* @param[in] *pCoeffs points to the filter coefficients.
|
||||
* @param[in] *pState points to the state buffer.
|
||||
* @param[in] postShift Shift to be applied to the accumulator result. Varies according to the coefficients format
|
||||
* @return none
|
||||
*
|
||||
* <b>Coefficient and State Ordering:</b>
|
||||
*
|
||||
* \par
|
||||
* The coefficients are stored in the array <code>pCoeffs</code> in the following order:
|
||||
* <pre>
|
||||
* {b10, 0, b11, b12, a11, a12, b20, 0, b21, b22, a21, a22, ...}
|
||||
* </pre>
|
||||
* where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
|
||||
* <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
|
||||
* and so on. The <code>pCoeffs</code> array contains a total of <code>6*numStages</code> values.
|
||||
* The zero coefficient between <code>b1</code> and <code>b2</code> facilities use of 16-bit SIMD instructions on the Cortex-M4.
|
||||
*
|
||||
* \par
|
||||
* The state variables are stored in the array <code>pState</code>.
|
||||
* Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
|
||||
* The state variables are arranged in the <code>pState</code> array as:
|
||||
* <pre>
|
||||
* {x[n-1], x[n-2], y[n-1], y[n-2]}
|
||||
* </pre>
|
||||
* The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
|
||||
* The state array has a total length of <code>4*numStages</code> values.
|
||||
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
||||
*/
|
||||
|
||||
void arm_biquad_cascade_df1_init_q15(
|
||||
arm_biquad_casd_df1_inst_q15 * S,
|
||||
uint8_t numStages,
|
||||
q15_t * pCoeffs,
|
||||
q15_t * pState,
|
||||
int8_t postShift)
|
||||
{
|
||||
/* Assign filter stages */
|
||||
S->numStages = numStages;
|
||||
|
||||
/* Assign postShift to be applied to the output */
|
||||
S->postShift = postShift;
|
||||
|
||||
/* Assign coefficient pointer */
|
||||
S->pCoeffs = pCoeffs;
|
||||
|
||||
/* Clear state buffer and size is always 4 * numStages */
|
||||
memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(q15_t));
|
||||
|
||||
/* Assign state pointer */
|
||||
S->pState = pState;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BiquadCascadeDF1 group
|
||||
*/
|
|
@ -1,109 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_biquad_cascade_df1_init_q31.c
|
||||
*
|
||||
* Description: Q31 Biquad cascade DirectFormI(DF1) filter initialization function.
|
||||
*
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.5 2010/04/26
|
||||
* incorporated review comments and updated with latest CMSIS layer
|
||||
*
|
||||
* Version 0.0.3 2010/03/10
|
||||
* Initial version
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupFilters
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BiquadCascadeDF1
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @details
|
||||
*
|
||||
* @param[in,out] *S points to an instance of the Q31 Biquad cascade structure.
|
||||
* @param[in] numStages number of 2nd order stages in the filter.
|
||||
* @param[in] *pCoeffs points to the filter coefficients buffer.
|
||||
* @param[in] *pState points to the state buffer.
|
||||
* @param[in] postShift Shift to be applied after the accumulator. Varies according to the coefficients format
|
||||
* @return none
|
||||
*
|
||||
* <b>Coefficient and State Ordering:</b>
|
||||
*
|
||||
* \par
|
||||
* The coefficients are stored in the array <code>pCoeffs</code> in the following order:
|
||||
* <pre>
|
||||
* {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
|
||||
* </pre>
|
||||
* where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
|
||||
* <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
|
||||
* and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
|
||||
*
|
||||
* \par
|
||||
* The <code>pState</code> points to state variables array.
|
||||
* Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
|
||||
* The state variables are arranged in the <code>pState</code> array as:
|
||||
* <pre>
|
||||
* {x[n-1], x[n-2], y[n-1], y[n-2]}
|
||||
* </pre>
|
||||
* The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
|
||||
* The state array has a total length of <code>4*numStages</code> values.
|
||||
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
||||
*/
|
||||
|
||||
void arm_biquad_cascade_df1_init_q31(
|
||||
arm_biquad_casd_df1_inst_q31 * S,
|
||||
uint8_t numStages,
|
||||
q31_t * pCoeffs,
|
||||
q31_t * pState,
|
||||
int8_t postShift)
|
||||
{
|
||||
/* Assign filter stages */
|
||||
S->numStages = numStages;
|
||||
|
||||
/* Assign postShift to be applied to the output */
|
||||
S->postShift = postShift;
|
||||
|
||||
/* Assign coefficient pointer */
|
||||
S->pCoeffs = pCoeffs;
|
||||
|
||||
/* Clear state buffer and size is always 4 * numStages */
|
||||
memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(q31_t));
|
||||
|
||||
/* Assign state pointer */
|
||||
S->pState = pState;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BiquadCascadeDF1 group
|
||||
*/
|
|
@ -1,408 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_biquad_cascade_df1_q15.c
|
||||
*
|
||||
* Description: Processing function for the
|
||||
* Q15 Biquad cascade DirectFormI(DF1) filter.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.5 2010/04/26
|
||||
* incorporated review comments and updated with latest CMSIS layer
|
||||
*
|
||||
* Version 0.0.3 2010/03/10
|
||||
* Initial version
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupFilters
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BiquadCascadeDF1
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Processing function for the Q15 Biquad cascade filter.
|
||||
* @param[in] *S points to an instance of the Q15 Biquad cascade structure.
|
||||
* @param[in] *pSrc points to the block of input data.
|
||||
* @param[out] *pDst points to the location where the output result is written.
|
||||
* @param[in] blockSize number of samples to process per call.
|
||||
* @return none.
|
||||
*
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function is implemented using a 64-bit internal accumulator.
|
||||
* Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
|
||||
* The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
|
||||
* There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
|
||||
* The accumulator is then shifted by <code>postShift</code> bits to truncate the result to 1.15 format by discarding the low 16 bits.
|
||||
* Finally, the result is saturated to 1.15 format.
|
||||
*
|
||||
* \par
|
||||
* Refer to the function <code>arm_biquad_cascade_df1_fast_q15()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4.
|
||||
*/
|
||||
|
||||
void arm_biquad_cascade_df1_q15(
|
||||
const arm_biquad_casd_df1_inst_q15 * S,
|
||||
q15_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q15_t *pIn = pSrc; /* Source pointer */
|
||||
q15_t *pOut = pDst; /* Destination pointer */
|
||||
q31_t in; /* Temporary variable to hold input value */
|
||||
q31_t out; /* Temporary variable to hold output value */
|
||||
q31_t b0; /* Temporary variable to hold bo value */
|
||||
q31_t b1, a1; /* Filter coefficients */
|
||||
q31_t state_in, state_out; /* Filter state variables */
|
||||
q31_t acc_l, acc_h;
|
||||
q63_t acc; /* Accumulator */
|
||||
int32_t lShift = (15 - (int32_t) S->postShift); /* Post shift */
|
||||
q15_t *pState = S->pState; /* State pointer */
|
||||
q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
|
||||
uint32_t sample, stage = (uint32_t) S->numStages; /* Stage loop counter */
|
||||
int32_t uShift = (32 - lShift);
|
||||
|
||||
do
|
||||
{
|
||||
/* Read the b0 and 0 coefficients using SIMD */
|
||||
b0 = *__SIMD32(pCoeffs)++;
|
||||
|
||||
/* Read the b1 and b2 coefficients using SIMD */
|
||||
b1 = *__SIMD32(pCoeffs)++;
|
||||
|
||||
/* Read the a1 and a2 coefficients using SIMD */
|
||||
a1 = *__SIMD32(pCoeffs)++;
|
||||
|
||||
/* Read the input state values from the state buffer: x[n-1], x[n-2] */
|
||||
state_in = *__SIMD32(pState)++;
|
||||
|
||||
/* Read the output state values from the state buffer: y[n-1], y[n-2] */
|
||||
state_out = *__SIMD32(pState)--;
|
||||
|
||||
/* Apply loop unrolling and compute 2 output values simultaneously. */
|
||||
/* The variable acc hold output values that are being computed:
|
||||
*
|
||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||
*/
|
||||
sample = blockSize >> 1u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 2 outputs at a time.
|
||||
** a second loop below computes the remaining 1 sample. */
|
||||
while(sample > 0u)
|
||||
{
|
||||
|
||||
/* Read the input */
|
||||
in = *__SIMD32(pIn)++;
|
||||
|
||||
/* out = b0 * x[n] + 0 * 0 */
|
||||
out = __SMUAD(b0, in);
|
||||
|
||||
/* acc += b1 * x[n-1] + b2 * x[n-2] + out */
|
||||
acc = __SMLALD(b1, state_in, out);
|
||||
/* acc += a1 * y[n-1] + a2 * y[n-2] */
|
||||
acc = __SMLALD(a1, state_out, acc);
|
||||
|
||||
/* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
|
||||
/* Calc lower part of acc */
|
||||
acc_l = acc & 0xffffffff;
|
||||
|
||||
/* Calc upper part of acc */
|
||||
acc_h = (acc >> 32) & 0xffffffff;
|
||||
|
||||
/* Apply shift for lower part of acc and upper part of acc */
|
||||
out = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
||||
|
||||
out = __SSAT(out, 16);
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
/* Xn1 = Xn */
|
||||
/* Yn2 = Yn1 */
|
||||
/* Yn1 = acc */
|
||||
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
|
||||
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
state_in = __PKHBT(in, state_in, 16);
|
||||
state_out = __PKHBT(out, state_out, 16);
|
||||
|
||||
#else
|
||||
|
||||
state_in = __PKHBT(state_in >> 16, (in >> 16), 16);
|
||||
state_out = __PKHBT(state_out >> 16, (out), 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* out = b0 * x[n] + 0 * 0 */
|
||||
out = __SMUADX(b0, in);
|
||||
/* acc += b1 * x[n-1] + b2 * x[n-2] + out */
|
||||
acc = __SMLALD(b1, state_in, out);
|
||||
/* acc += a1 * y[n-1] + a2 * y[n-2] */
|
||||
acc = __SMLALD(a1, state_out, acc);
|
||||
|
||||
/* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
|
||||
/* Calc lower part of acc */
|
||||
acc_l = acc & 0xffffffff;
|
||||
|
||||
/* Calc upper part of acc */
|
||||
acc_h = (acc >> 32) & 0xffffffff;
|
||||
|
||||
/* Apply shift for lower part of acc and upper part of acc */
|
||||
out = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
||||
|
||||
out = __SSAT(out, 16);
|
||||
|
||||
/* Store the output in the destination buffer. */
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
*__SIMD32(pOut)++ = __PKHBT(state_out, out, 16);
|
||||
|
||||
#else
|
||||
|
||||
*__SIMD32(pOut)++ = __PKHBT(out, state_out >> 16, 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
/* Xn1 = Xn */
|
||||
/* Yn2 = Yn1 */
|
||||
/* Yn1 = acc */
|
||||
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
|
||||
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
state_in = __PKHBT(in >> 16, state_in, 16);
|
||||
state_out = __PKHBT(out, state_out, 16);
|
||||
|
||||
#else
|
||||
|
||||
state_in = __PKHBT(state_in >> 16, in, 16);
|
||||
state_out = __PKHBT(state_out >> 16, out, 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
|
||||
/* Decrement the loop counter */
|
||||
sample--;
|
||||
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 2, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
|
||||
if((blockSize & 0x1u) != 0u)
|
||||
{
|
||||
/* Read the input */
|
||||
in = *pIn++;
|
||||
|
||||
/* out = b0 * x[n] + 0 * 0 */
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
out = __SMUAD(b0, in);
|
||||
|
||||
#else
|
||||
|
||||
out = __SMUADX(b0, in);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* acc = b1 * x[n-1] + b2 * x[n-2] + out */
|
||||
acc = __SMLALD(b1, state_in, out);
|
||||
/* acc += a1 * y[n-1] + a2 * y[n-2] */
|
||||
acc = __SMLALD(a1, state_out, acc);
|
||||
|
||||
/* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
|
||||
/* Calc lower part of acc */
|
||||
acc_l = acc & 0xffffffff;
|
||||
|
||||
/* Calc upper part of acc */
|
||||
acc_h = (acc >> 32) & 0xffffffff;
|
||||
|
||||
/* Apply shift for lower part of acc and upper part of acc */
|
||||
out = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
||||
|
||||
out = __SSAT(out, 16);
|
||||
|
||||
/* Store the output in the destination buffer. */
|
||||
*pOut++ = (q15_t) out;
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
/* Xn1 = Xn */
|
||||
/* Yn2 = Yn1 */
|
||||
/* Yn1 = acc */
|
||||
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
|
||||
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
state_in = __PKHBT(in, state_in, 16);
|
||||
state_out = __PKHBT(out, state_out, 16);
|
||||
|
||||
#else
|
||||
|
||||
state_in = __PKHBT(state_in >> 16, in, 16);
|
||||
state_out = __PKHBT(state_out >> 16, out, 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
}
|
||||
|
||||
/* The first stage goes from the input wire to the output wire. */
|
||||
/* Subsequent numStages occur in-place in the output wire */
|
||||
pIn = pDst;
|
||||
|
||||
/* Reset the output pointer */
|
||||
pOut = pDst;
|
||||
|
||||
/* Store the updated state variables back into the state array */
|
||||
*__SIMD32(pState)++ = state_in;
|
||||
*__SIMD32(pState)++ = state_out;
|
||||
|
||||
|
||||
/* Decrement the loop counter */
|
||||
stage--;
|
||||
|
||||
} while(stage > 0u);
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
q15_t *pIn = pSrc; /* Source pointer */
|
||||
q15_t *pOut = pDst; /* Destination pointer */
|
||||
q15_t b0, b1, b2, a1, a2; /* Filter coefficients */
|
||||
q15_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */
|
||||
q15_t Xn; /* temporary input */
|
||||
q63_t acc; /* Accumulator */
|
||||
int32_t shift = (15 - (int32_t) S->postShift); /* Post shift */
|
||||
q15_t *pState = S->pState; /* State pointer */
|
||||
q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
|
||||
uint32_t sample, stage = (uint32_t) S->numStages; /* Stage loop counter */
|
||||
|
||||
do
|
||||
{
|
||||
/* Reading the coefficients */
|
||||
b0 = *pCoeffs++;
|
||||
b1 = *pCoeffs++;
|
||||
b2 = *pCoeffs++;
|
||||
a1 = *pCoeffs++;
|
||||
a2 = *pCoeffs++;
|
||||
|
||||
/* Reading the state values */
|
||||
Xn1 = pState[0];
|
||||
Xn2 = pState[1];
|
||||
Yn1 = pState[2];
|
||||
Yn2 = pState[3];
|
||||
|
||||
/* The variables acc holds the output value that is computed:
|
||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||
*/
|
||||
|
||||
sample = blockSize;
|
||||
|
||||
while(sample > 0u)
|
||||
{
|
||||
/* Read the input */
|
||||
Xn = *pIn++;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
/* acc = b0 * x[n] */
|
||||
acc = (q31_t) b0 *Xn;
|
||||
|
||||
/* acc += b1 * x[n-1] */
|
||||
acc += (q31_t) b1 *Xn1;
|
||||
/* acc += b[2] * x[n-2] */
|
||||
acc += (q31_t) b2 *Xn2;
|
||||
/* acc += a1 * y[n-1] */
|
||||
acc += (q31_t) a1 *Yn1;
|
||||
/* acc += a2 * y[n-2] */
|
||||
acc += (q31_t) a2 *Yn2;
|
||||
|
||||
/* The result is converted to 1.31 */
|
||||
acc = __SSAT((acc >> shift), 16);
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
/* Xn1 = Xn */
|
||||
/* Yn2 = Yn1 */
|
||||
/* Yn1 = acc */
|
||||
Xn2 = Xn1;
|
||||
Xn1 = Xn;
|
||||
Yn2 = Yn1;
|
||||
Yn1 = (q15_t) acc;
|
||||
|
||||
/* Store the output in the destination buffer. */
|
||||
*pOut++ = (q15_t) acc;
|
||||
|
||||
/* decrement the loop counter */
|
||||
sample--;
|
||||
}
|
||||
|
||||
/* The first stage goes from the input buffer to the output buffer. */
|
||||
/* Subsequent stages occur in-place in the output buffer */
|
||||
pIn = pDst;
|
||||
|
||||
/* Reset to destination pointer */
|
||||
pOut = pDst;
|
||||
|
||||
/* Store the updated state variables back into the pState array */
|
||||
*pState++ = Xn1;
|
||||
*pState++ = Xn2;
|
||||
*pState++ = Yn1;
|
||||
*pState++ = Yn2;
|
||||
|
||||
} while(--stage);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @} end of BiquadCascadeDF1 group
|
||||
*/
|
|
@ -1,400 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_biquad_cascade_df1_q31.c
|
||||
*
|
||||
* Description: Processing function for the
|
||||
* Q31 Biquad cascade filter
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.5 2010/04/26
|
||||
* incorporated review comments and updated with latest CMSIS layer
|
||||
*
|
||||
* Version 0.0.3 2010/03/10
|
||||
* Initial version
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupFilters
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BiquadCascadeDF1
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Processing function for the Q31 Biquad cascade filter.
|
||||
* @param[in] *S points to an instance of the Q31 Biquad cascade structure.
|
||||
* @param[in] *pSrc points to the block of input data.
|
||||
* @param[out] *pDst points to the block of output data.
|
||||
* @param[in] blockSize number of samples to process per call.
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function is implemented using an internal 64-bit accumulator.
|
||||
* The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
|
||||
* Thus, if the accumulator result overflows it wraps around rather than clip.
|
||||
* In order to avoid overflows completely the input signal must be scaled down by 2 bits and lie in the range [-0.25 +0.25).
|
||||
* After all 5 multiply-accumulates are performed, the 2.62 accumulator is shifted by <code>postShift</code> bits and the result truncated to
|
||||
* 1.31 format by discarding the low 32 bits.
|
||||
*
|
||||
* \par
|
||||
* Refer to the function <code>arm_biquad_cascade_df1_fast_q31()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4.
|
||||
*/
|
||||
|
||||
void arm_biquad_cascade_df1_q31(
|
||||
const arm_biquad_casd_df1_inst_q31 * S,
|
||||
q31_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
q63_t acc; /* accumulator */
|
||||
uint32_t uShift = ((uint32_t) S->postShift + 1u);
|
||||
uint32_t lShift = 32u - uShift; /* Shift to be applied to the output */
|
||||
q31_t *pIn = pSrc; /* input pointer initialization */
|
||||
q31_t *pOut = pDst; /* output pointer initialization */
|
||||
q31_t *pState = S->pState; /* pState pointer initialization */
|
||||
q31_t *pCoeffs = S->pCoeffs; /* coeff pointer initialization */
|
||||
q31_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */
|
||||
q31_t b0, b1, b2, a1, a2; /* Filter coefficients */
|
||||
q31_t Xn; /* temporary input */
|
||||
uint32_t sample, stage = S->numStages; /* loop counters */
|
||||
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
q31_t acc_l, acc_h; /* temporary output variables */
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
do
|
||||
{
|
||||
/* Reading the coefficients */
|
||||
b0 = *pCoeffs++;
|
||||
b1 = *pCoeffs++;
|
||||
b2 = *pCoeffs++;
|
||||
a1 = *pCoeffs++;
|
||||
a2 = *pCoeffs++;
|
||||
|
||||
/* Reading the state values */
|
||||
Xn1 = pState[0];
|
||||
Xn2 = pState[1];
|
||||
Yn1 = pState[2];
|
||||
Yn2 = pState[3];
|
||||
|
||||
/* Apply loop unrolling and compute 4 output values simultaneously. */
|
||||
/* The variable acc hold output values that are being computed:
|
||||
*
|
||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||
*/
|
||||
|
||||
sample = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(sample > 0u)
|
||||
{
|
||||
/* Read the input */
|
||||
Xn = *pIn++;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
|
||||
/* acc = b0 * x[n] */
|
||||
acc = (q63_t) b0 *Xn;
|
||||
/* acc += b1 * x[n-1] */
|
||||
acc += (q63_t) b1 *Xn1;
|
||||
/* acc += b[2] * x[n-2] */
|
||||
acc += (q63_t) b2 *Xn2;
|
||||
/* acc += a1 * y[n-1] */
|
||||
acc += (q63_t) a1 *Yn1;
|
||||
/* acc += a2 * y[n-2] */
|
||||
acc += (q63_t) a2 *Yn2;
|
||||
|
||||
/* The result is converted to 1.31 , Yn2 variable is reused */
|
||||
|
||||
/* Calc lower part of acc */
|
||||
acc_l = acc & 0xffffffff;
|
||||
|
||||
/* Calc upper part of acc */
|
||||
acc_h = (acc >> 32) & 0xffffffff;
|
||||
|
||||
/* Apply shift for lower part of acc and upper part of acc */
|
||||
Yn2 = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
||||
|
||||
/* Store the output in the destination buffer. */
|
||||
*pOut++ = Yn2;
|
||||
|
||||
/* Read the second input */
|
||||
Xn2 = *pIn++;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
|
||||
/* acc = b0 * x[n] */
|
||||
acc = (q63_t) b0 *Xn2;
|
||||
/* acc += b1 * x[n-1] */
|
||||
acc += (q63_t) b1 *Xn;
|
||||
/* acc += b[2] * x[n-2] */
|
||||
acc += (q63_t) b2 *Xn1;
|
||||
/* acc += a1 * y[n-1] */
|
||||
acc += (q63_t) a1 *Yn2;
|
||||
/* acc += a2 * y[n-2] */
|
||||
acc += (q63_t) a2 *Yn1;
|
||||
|
||||
|
||||
/* The result is converted to 1.31, Yn1 variable is reused */
|
||||
|
||||
/* Calc lower part of acc */
|
||||
acc_l = acc & 0xffffffff;
|
||||
|
||||
/* Calc upper part of acc */
|
||||
acc_h = (acc >> 32) & 0xffffffff;
|
||||
|
||||
|
||||
/* Apply shift for lower part of acc and upper part of acc */
|
||||
Yn1 = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
||||
|
||||
/* Store the output in the destination buffer. */
|
||||
*pOut++ = Yn1;
|
||||
|
||||
/* Read the third input */
|
||||
Xn1 = *pIn++;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
|
||||
/* acc = b0 * x[n] */
|
||||
acc = (q63_t) b0 *Xn1;
|
||||
/* acc += b1 * x[n-1] */
|
||||
acc += (q63_t) b1 *Xn2;
|
||||
/* acc += b[2] * x[n-2] */
|
||||
acc += (q63_t) b2 *Xn;
|
||||
/* acc += a1 * y[n-1] */
|
||||
acc += (q63_t) a1 *Yn1;
|
||||
/* acc += a2 * y[n-2] */
|
||||
acc += (q63_t) a2 *Yn2;
|
||||
|
||||
/* The result is converted to 1.31, Yn2 variable is reused */
|
||||
/* Calc lower part of acc */
|
||||
acc_l = acc & 0xffffffff;
|
||||
|
||||
/* Calc upper part of acc */
|
||||
acc_h = (acc >> 32) & 0xffffffff;
|
||||
|
||||
|
||||
/* Apply shift for lower part of acc and upper part of acc */
|
||||
Yn2 = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
||||
|
||||
/* Store the output in the destination buffer. */
|
||||
*pOut++ = Yn2;
|
||||
|
||||
/* Read the forth input */
|
||||
Xn = *pIn++;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
|
||||
/* acc = b0 * x[n] */
|
||||
acc = (q63_t) b0 *Xn;
|
||||
/* acc += b1 * x[n-1] */
|
||||
acc += (q63_t) b1 *Xn1;
|
||||
/* acc += b[2] * x[n-2] */
|
||||
acc += (q63_t) b2 *Xn2;
|
||||
/* acc += a1 * y[n-1] */
|
||||
acc += (q63_t) a1 *Yn2;
|
||||
/* acc += a2 * y[n-2] */
|
||||
acc += (q63_t) a2 *Yn1;
|
||||
|
||||
/* The result is converted to 1.31, Yn1 variable is reused */
|
||||
/* Calc lower part of acc */
|
||||
acc_l = acc & 0xffffffff;
|
||||
|
||||
/* Calc upper part of acc */
|
||||
acc_h = (acc >> 32) & 0xffffffff;
|
||||
|
||||
/* Apply shift for lower part of acc and upper part of acc */
|
||||
Yn1 = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
/* Xn1 = Xn */
|
||||
/* Yn2 = Yn1 */
|
||||
/* Yn1 = acc */
|
||||
Xn2 = Xn1;
|
||||
Xn1 = Xn;
|
||||
|
||||
/* Store the output in the destination buffer. */
|
||||
*pOut++ = Yn1;
|
||||
|
||||
/* decrement the loop counter */
|
||||
sample--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
sample = (blockSize & 0x3u);
|
||||
|
||||
while(sample > 0u)
|
||||
{
|
||||
/* Read the input */
|
||||
Xn = *pIn++;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
|
||||
/* acc = b0 * x[n] */
|
||||
acc = (q63_t) b0 *Xn;
|
||||
/* acc += b1 * x[n-1] */
|
||||
acc += (q63_t) b1 *Xn1;
|
||||
/* acc += b[2] * x[n-2] */
|
||||
acc += (q63_t) b2 *Xn2;
|
||||
/* acc += a1 * y[n-1] */
|
||||
acc += (q63_t) a1 *Yn1;
|
||||
/* acc += a2 * y[n-2] */
|
||||
acc += (q63_t) a2 *Yn2;
|
||||
|
||||
/* The result is converted to 1.31 */
|
||||
acc = acc >> lShift;
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
/* Xn1 = Xn */
|
||||
/* Yn2 = Yn1 */
|
||||
/* Yn1 = acc */
|
||||
Xn2 = Xn1;
|
||||
Xn1 = Xn;
|
||||
Yn2 = Yn1;
|
||||
Yn1 = (q31_t) acc;
|
||||
|
||||
/* Store the output in the destination buffer. */
|
||||
*pOut++ = (q31_t) acc;
|
||||
|
||||
/* decrement the loop counter */
|
||||
sample--;
|
||||
}
|
||||
|
||||
/* The first stage goes from the input buffer to the output buffer. */
|
||||
/* Subsequent stages occur in-place in the output buffer */
|
||||
pIn = pDst;
|
||||
|
||||
/* Reset to destination pointer */
|
||||
pOut = pDst;
|
||||
|
||||
/* Store the updated state variables back into the pState array */
|
||||
*pState++ = Xn1;
|
||||
*pState++ = Xn2;
|
||||
*pState++ = Yn1;
|
||||
*pState++ = Yn2;
|
||||
|
||||
} while(--stage);
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
do
|
||||
{
|
||||
/* Reading the coefficients */
|
||||
b0 = *pCoeffs++;
|
||||
b1 = *pCoeffs++;
|
||||
b2 = *pCoeffs++;
|
||||
a1 = *pCoeffs++;
|
||||
a2 = *pCoeffs++;
|
||||
|
||||
/* Reading the state values */
|
||||
Xn1 = pState[0];
|
||||
Xn2 = pState[1];
|
||||
Yn1 = pState[2];
|
||||
Yn2 = pState[3];
|
||||
|
||||
/* The variables acc holds the output value that is computed:
|
||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||
*/
|
||||
|
||||
sample = blockSize;
|
||||
|
||||
while(sample > 0u)
|
||||
{
|
||||
/* Read the input */
|
||||
Xn = *pIn++;
|
||||
|
||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||
/* acc = b0 * x[n] */
|
||||
acc = (q63_t) b0 *Xn;
|
||||
|
||||
/* acc += b1 * x[n-1] */
|
||||
acc += (q63_t) b1 *Xn1;
|
||||
/* acc += b[2] * x[n-2] */
|
||||
acc += (q63_t) b2 *Xn2;
|
||||
/* acc += a1 * y[n-1] */
|
||||
acc += (q63_t) a1 *Yn1;
|
||||
/* acc += a2 * y[n-2] */
|
||||
acc += (q63_t) a2 *Yn2;
|
||||
|
||||
/* The result is converted to 1.31 */
|
||||
acc = acc >> lShift;
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* The states should be updated as: */
|
||||
/* Xn2 = Xn1 */
|
||||
/* Xn1 = Xn */
|
||||
/* Yn2 = Yn1 */
|
||||
/* Yn1 = acc */
|
||||
Xn2 = Xn1;
|
||||
Xn1 = Xn;
|
||||
Yn2 = Yn1;
|
||||
Yn1 = (q31_t) acc;
|
||||
|
||||
/* Store the output in the destination buffer. */
|
||||
*pOut++ = (q31_t) acc;
|
||||
|
||||
/* decrement the loop counter */
|
||||
sample--;
|
||||
}
|
||||
|
||||
/* The first stage goes from the input buffer to the output buffer. */
|
||||
/* Subsequent stages occur in-place in the output buffer */
|
||||
pIn = pDst;
|
||||
|
||||
/* Reset to destination pointer */
|
||||
pOut = pDst;
|
||||
|
||||
/* Store the updated state variables back into the pState array */
|
||||
*pState++ = Xn1;
|
||||
*pState++ = Xn2;
|
||||
*pState++ = Yn1;
|
||||
*pState++ = Yn2;
|
||||
|
||||
} while(--stage);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BiquadCascadeDF1 group
|
||||
*/
|
|
@ -1,377 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_biquad_cascade_df2T_f32.c
|
||||
*
|
||||
* Description: Processing function for the floating-point transposed
|
||||
* direct form II Biquad cascade filter.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupFilters
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup BiquadCascadeDF2T Biquad Cascade IIR Filters Using a Direct Form II Transposed Structure
|
||||
*
|
||||
* This set of functions implements arbitrary order recursive (IIR) filters using a transposed direct form II structure.
|
||||
* The filters are implemented as a cascade of second order Biquad sections.
|
||||
* These functions provide a slight memory savings as compared to the direct form I Biquad filter functions.
|
||||
* Only floating-point data is supported.
|
||||
*
|
||||
* This function operate on blocks of input and output data and each call to the function
|
||||
* processes <code>blockSize</code> samples through the filter.
|
||||
* <code>pSrc</code> points to the array of input data and
|
||||
* <code>pDst</code> points to the array of output data.
|
||||
* Both arrays contain <code>blockSize</code> values.
|
||||
*
|
||||
* \par Algorithm
|
||||
* Each Biquad stage implements a second order filter using the difference equation:
|
||||
* <pre>
|
||||
* y[n] = b0 * x[n] + d1
|
||||
* d1 = b1 * x[n] + a1 * y[n] + d2
|
||||
* d2 = b2 * x[n] + a2 * y[n]
|
||||
* </pre>
|
||||
* where d1 and d2 represent the two state values.
|
||||
*
|
||||
* \par
|
||||
* A Biquad filter using a transposed Direct Form II structure is shown below.
|
||||
* \image html BiquadDF2Transposed.gif "Single transposed Direct Form II Biquad"
|
||||
* Coefficients <code>b0, b1, and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.
|
||||
* Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.
|
||||
* Pay careful attention to the sign of the feedback coefficients.
|
||||
* Some design tools flip the sign of the feedback coefficients:
|
||||
* <pre>
|
||||
* y[n] = b0 * x[n] + d1;
|
||||
* d1 = b1 * x[n] - a1 * y[n] + d2;
|
||||
* d2 = b2 * x[n] - a2 * y[n];
|
||||
* </pre>
|
||||
* In this case the feedback coefficients <code>a1</code> and <code>a2</code> must be negated when used with the CMSIS DSP Library.
|
||||
*
|
||||
* \par
|
||||
* Higher order filters are realized as a cascade of second order sections.
|
||||
* <code>numStages</code> refers to the number of second order stages used.
|
||||
* For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.
|
||||
* A 9th order filter would be realized with <code>numStages=5</code> second order stages with the
|
||||
* coefficients for one of the stages configured as a first order filter (<code>b2=0</code> and <code>a2=0</code>).
|
||||
*
|
||||
* \par
|
||||
* <code>pState</code> points to the state variable array.
|
||||
* Each Biquad stage has 2 state variables <code>d1</code> and <code>d2</code>.
|
||||
* The state variables are arranged in the <code>pState</code> array as:
|
||||
* <pre>
|
||||
* {d11, d12, d21, d22, ...}
|
||||
* </pre>
|
||||
* where <code>d1x</code> refers to the state variables for the first Biquad and
|
||||
* <code>d2x</code> refers to the state variables for the second Biquad.
|
||||
* The state array has a total length of <code>2*numStages</code> values.
|
||||
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
||||
*
|
||||
* \par
|
||||
* The CMSIS library contains Biquad filters in both Direct Form I and transposed Direct Form II.
|
||||
* The advantage of the Direct Form I structure is that it is numerically more robust for fixed-point data types.
|
||||
* That is why the Direct Form I structure supports Q15 and Q31 data types.
|
||||
* The transposed Direct Form II structure, on the other hand, requires a wide dynamic range for the state variables <code>d1</code> and <code>d2</code>.
|
||||
* Because of this, the CMSIS library only has a floating-point version of the Direct Form II Biquad.
|
||||
* The advantage of the Direct Form II Biquad is that it requires half the number of state variables, 2 rather than 4, per Biquad stage.
|
||||
*
|
||||
* \par Instance Structure
|
||||
* The coefficients and state variables for a filter are stored together in an instance data structure.
|
||||
* A separate instance structure must be defined for each filter.
|
||||
* Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.
|
||||
*
|
||||
* \par Init Functions
|
||||
* There is also an associated initialization function.
|
||||
* The initialization function performs following operations:
|
||||
* - Sets the values of the internal structure fields.
|
||||
* - Zeros out the values in the state buffer.
|
||||
*
|
||||
* \par
|
||||
* Use of the initialization function is optional.
|
||||
* However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
|
||||
* To place an instance structure into a const data section, the instance structure must be manually initialized.
|
||||
* Set the values in the state buffer to zeros before static initialization.
|
||||
* For example, to statically initialize the instance structure use
|
||||
* <pre>
|
||||
* arm_biquad_cascade_df2T_instance_f32 S1 = {numStages, pState, pCoeffs};
|
||||
* </pre>
|
||||
* where <code>numStages</code> is the number of Biquad stages in the filter; <code>pState</code> is the address of the state buffer.
|
||||
* <code>pCoeffs</code> is the address of the coefficient buffer;
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BiquadCascadeDF2T
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
|
||||
* @param[in] *S points to an instance of the filter data structure.
|
||||
* @param[in] *pSrc points to the block of input data.
|
||||
* @param[out] *pDst points to the block of output data
|
||||
* @param[in] blockSize number of samples to process.
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_biquad_cascade_df2T_f32(
|
||||
const arm_biquad_cascade_df2T_instance_f32 * S,
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
|
||||
float32_t *pIn = pSrc; /* source pointer */
|
||||
float32_t *pOut = pDst; /* destination pointer */
|
||||
float32_t *pState = S->pState; /* State pointer */
|
||||
float32_t *pCoeffs = S->pCoeffs; /* coefficient pointer */
|
||||
float32_t acc0; /* accumulator */
|
||||
float32_t b0, b1, b2, a1, a2; /* Filter coefficients */
|
||||
float32_t Xn; /* temporary input */
|
||||
float32_t d1, d2; /* state variables */
|
||||
uint32_t sample, stage = S->numStages; /* loop counters */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
float32_t Xn1, Xn2; /* Input State variables */
|
||||
float32_t acc1; /* accumulator */
|
||||
|
||||
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
do
|
||||
{
|
||||
/* Reading the coefficients */
|
||||
b0 = *pCoeffs++;
|
||||
b1 = *pCoeffs++;
|
||||
b2 = *pCoeffs++;
|
||||
a1 = *pCoeffs++;
|
||||
a2 = *pCoeffs++;
|
||||
|
||||
/*Reading the state values */
|
||||
d1 = pState[0];
|
||||
d2 = pState[1];
|
||||
|
||||
/* Apply loop unrolling and compute 4 output values simultaneously. */
|
||||
sample = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(sample > 0u)
|
||||
{
|
||||
|
||||
/* y[n] = b0 * x[n] + d1 */
|
||||
/* d1 = b1 * x[n] + a1 * y[n] + d2 */
|
||||
/* d2 = b2 * x[n] + a2 * y[n] */
|
||||
|
||||
/* Read the first input */
|
||||
Xn1 = *pIn++;
|
||||
|
||||
/* y[n] = b0 * x[n] + d1 */
|
||||
acc0 = (b0 * Xn1) + d1;
|
||||
|
||||
/* d1 = b1 * x[n] + d2 */
|
||||
d1 = (b1 * Xn1) + d2;
|
||||
|
||||
/* d2 = b2 * x[n] */
|
||||
d2 = (b2 * Xn1);
|
||||
|
||||
/* Read the second input */
|
||||
Xn2 = *pIn++;
|
||||
|
||||
/* d1 = b1 * x[n] + a1 * y[n] */
|
||||
d1 = (a1 * acc0) + d1;
|
||||
|
||||
/* Store the result in the accumulator in the destination buffer. */
|
||||
*pOut++ = acc0;
|
||||
|
||||
d2 = (a2 * acc0) + d2;
|
||||
|
||||
/* y[n] = b0 * x[n] + d1 */
|
||||
acc1 = (b0 * Xn2) + d1;
|
||||
|
||||
/* Read the third input */
|
||||
Xn1 = *pIn++;
|
||||
|
||||
d1 = (b1 * Xn2) + d2;
|
||||
|
||||
d2 = (b2 * Xn2);
|
||||
|
||||
/* Store the result in the accumulator in the destination buffer. */
|
||||
*pOut++ = acc1;
|
||||
|
||||
d1 = (a1 * acc1) + d1;
|
||||
|
||||
d2 = (a2 * acc1) + d2;
|
||||
|
||||
/* y[n] = b0 * x[n] + d1 */
|
||||
acc0 = (b0 * Xn1) + d1;
|
||||
|
||||
d1 = (b1 * Xn1) + d2;
|
||||
|
||||
d2 = (b2 * Xn1);
|
||||
|
||||
/* Read the fourth input */
|
||||
Xn2 = *pIn++;
|
||||
|
||||
d1 = (a1 * acc0) + d1;
|
||||
|
||||
/* Store the result in the accumulator in the destination buffer. */
|
||||
*pOut++ = acc0;
|
||||
|
||||
d2 = (a2 * acc0) + d2;
|
||||
|
||||
/* y[n] = b0 * x[n] + d1 */
|
||||
acc1 = (b0 * Xn2) + d1;
|
||||
|
||||
d1 = (b1 * Xn2) + d2;
|
||||
|
||||
d2 = (b2 * Xn2);
|
||||
|
||||
/* Store the result in the accumulator in the destination buffer. */
|
||||
*pOut++ = acc1;
|
||||
|
||||
d1 = (a1 * acc1) + d1;
|
||||
|
||||
d2 = (a2 * acc1) + d2;
|
||||
|
||||
/* decrement the loop counter */
|
||||
sample--;
|
||||
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
sample = blockSize & 0x3u;
|
||||
|
||||
while(sample > 0u)
|
||||
{
|
||||
/* Read the input */
|
||||
Xn = *pIn++;
|
||||
|
||||
/* y[n] = b0 * x[n] + d1 */
|
||||
acc0 = (b0 * Xn) + d1;
|
||||
|
||||
/* Store the result in the accumulator in the destination buffer. */
|
||||
*pOut++ = acc0;
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* d1 = b1 * x[n] + a1 * y[n] + d2 */
|
||||
d1 = ((b1 * Xn) + (a1 * acc0)) + d2;
|
||||
|
||||
/* d2 = b2 * x[n] + a2 * y[n] */
|
||||
d2 = (b2 * Xn) + (a2 * acc0);
|
||||
|
||||
/* decrement the loop counter */
|
||||
sample--;
|
||||
}
|
||||
|
||||
/* Store the updated state variables back into the state array */
|
||||
*pState++ = d1;
|
||||
*pState++ = d2;
|
||||
|
||||
/* The current stage input is given as the output to the next stage */
|
||||
pIn = pDst;
|
||||
|
||||
/*Reset the output working pointer */
|
||||
pOut = pDst;
|
||||
|
||||
/* decrement the loop counter */
|
||||
stage--;
|
||||
|
||||
} while(stage > 0u);
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
do
|
||||
{
|
||||
/* Reading the coefficients */
|
||||
b0 = *pCoeffs++;
|
||||
b1 = *pCoeffs++;
|
||||
b2 = *pCoeffs++;
|
||||
a1 = *pCoeffs++;
|
||||
a2 = *pCoeffs++;
|
||||
|
||||
/*Reading the state values */
|
||||
d1 = pState[0];
|
||||
d2 = pState[1];
|
||||
|
||||
|
||||
sample = blockSize;
|
||||
|
||||
while(sample > 0u)
|
||||
{
|
||||
/* Read the input */
|
||||
Xn = *pIn++;
|
||||
|
||||
/* y[n] = b0 * x[n] + d1 */
|
||||
acc0 = (b0 * Xn) + d1;
|
||||
|
||||
/* Store the result in the accumulator in the destination buffer. */
|
||||
*pOut++ = acc0;
|
||||
|
||||
/* Every time after the output is computed state should be updated. */
|
||||
/* d1 = b1 * x[n] + a1 * y[n] + d2 */
|
||||
d1 = ((b1 * Xn) + (a1 * acc0)) + d2;
|
||||
|
||||
/* d2 = b2 * x[n] + a2 * y[n] */
|
||||
d2 = (b2 * Xn) + (a2 * acc0);
|
||||
|
||||
/* decrement the loop counter */
|
||||
sample--;
|
||||
}
|
||||
|
||||
/* Store the updated state variables back into the state array */
|
||||
*pState++ = d1;
|
||||
*pState++ = d2;
|
||||
|
||||
/* The current stage input is given as the output to the next stage */
|
||||
pIn = pDst;
|
||||
|
||||
/*Reset the output working pointer */
|
||||
pOut = pDst;
|
||||
|
||||
/* decrement the loop counter */
|
||||
stage--;
|
||||
|
||||
} while(stage > 0u);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @} end of BiquadCascadeDF2T group
|
||||
*/
|
|
@ -1,97 +0,0 @@
|
|||
/*-----------------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_biquad_cascade_df2T_init_f32.c
|
||||
*
|
||||
* Description: Initialization function for the floating-point transposed
|
||||
* direct form II Biquad cascade filter.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* ---------------------------------------------------------------------------*/
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupFilters
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup BiquadCascadeDF2T
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.
|
||||
* @param[in,out] *S points to an instance of the filter data structure.
|
||||
* @param[in] numStages number of 2nd order stages in the filter.
|
||||
* @param[in] *pCoeffs points to the filter coefficients.
|
||||
* @param[in] *pState points to the state buffer.
|
||||
* @return none
|
||||
*
|
||||
* <b>Coefficient and State Ordering:</b>
|
||||
* \par
|
||||
* The coefficients are stored in the array <code>pCoeffs</code> in the following order:
|
||||
* <pre>
|
||||
* {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
|
||||
* </pre>
|
||||
*
|
||||
* \par
|
||||
* where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
|
||||
* <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
|
||||
* and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
|
||||
*
|
||||
* \par
|
||||
* The <code>pState</code> is a pointer to state array.
|
||||
* Each Biquad stage has 2 state variables <code>d1,</code> and <code>d2</code>.
|
||||
* The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.
|
||||
* The state array has a total length of <code>2*numStages</code> values.
|
||||
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
||||
*/
|
||||
|
||||
void arm_biquad_cascade_df2T_init_f32(
|
||||
arm_biquad_cascade_df2T_instance_f32 * S,
|
||||
uint8_t numStages,
|
||||
float32_t * pCoeffs,
|
||||
float32_t * pState)
|
||||
{
|
||||
/* Assign filter stages */
|
||||
S->numStages = numStages;
|
||||
|
||||
/* Assign coefficient pointer */
|
||||
S->pCoeffs = pCoeffs;
|
||||
|
||||
/* Clear state buffer and size is always 2 * numStages */
|
||||
memset(pState, 0, (2u * (uint32_t) numStages) * sizeof(float32_t));
|
||||
|
||||
/* Assign state pointer */
|
||||
S->pState = pState;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BiquadCascadeDF2T group
|
||||
*/
|
|
@ -1,646 +0,0 @@
|
|||
/* ----------------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_conv_f32.c
|
||||
*
|
||||
* Description: Convolution of floating-point sequences.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.11 2011/10/18
|
||||
* Bug Fix in conv, correlation, partial convolution.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
*
|
||||
* -------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupFilters
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup Conv Convolution
|
||||
*
|
||||
* Convolution is a mathematical operation that operates on two finite length vectors to generate a finite length output vector.
|
||||
* Convolution is similar to correlation and is frequently used in filtering and data analysis.
|
||||
* The CMSIS DSP library contains functions for convolving Q7, Q15, Q31, and floating-point data types.
|
||||
* The library also provides fast versions of the Q15 and Q31 functions on Cortex-M4 and Cortex-M3.
|
||||
*
|
||||
* \par Algorithm
|
||||
* Let <code>a[n]</code> and <code>b[n]</code> be sequences of length <code>srcALen</code> and <code>srcBLen</code> samples respectively.
|
||||
* Then the convolution
|
||||
*
|
||||
* <pre>
|
||||
* c[n] = a[n] * b[n]
|
||||
* </pre>
|
||||
*
|
||||
* \par
|
||||
* is defined as
|
||||
* \image html ConvolutionEquation.gif
|
||||
* \par
|
||||
* Note that <code>c[n]</code> is of length <code>srcALen + srcBLen - 1</code> and is defined over the interval <code>n=0, 1, 2, ..., srcALen + srcBLen - 2</code>.
|
||||
* <code>pSrcA</code> points to the first input vector of length <code>srcALen</code> and
|
||||
* <code>pSrcB</code> points to the second input vector of length <code>srcBLen</code>.
|
||||
* The output result is written to <code>pDst</code> and the calling function must allocate <code>srcALen+srcBLen-1</code> words for the result.
|
||||
*
|
||||
* \par
|
||||
* Conceptually, when two signals <code>a[n]</code> and <code>b[n]</code> are convolved,
|
||||
* the signal <code>b[n]</code> slides over <code>a[n]</code>.
|
||||
* For each offset \c n, the overlapping portions of a[n] and b[n] are multiplied and summed together.
|
||||
*
|
||||
* \par
|
||||
* Note that convolution is a commutative operation:
|
||||
*
|
||||
* <pre>
|
||||
* a[n] * b[n] = b[n] * a[n].
|
||||
* </pre>
|
||||
*
|
||||
* \par
|
||||
* This means that switching the A and B arguments to the convolution functions has no effect.
|
||||
*
|
||||
* <b>Fixed-Point Behavior</b>
|
||||
*
|
||||
* \par
|
||||
* Convolution requires summing up a large number of intermediate products.
|
||||
* As such, the Q7, Q15, and Q31 functions run a risk of overflow and saturation.
|
||||
* Refer to the function specific documentation below for further details of the particular algorithm used.
|
||||
*
|
||||
*
|
||||
* <b>Fast Versions</b>
|
||||
*
|
||||
* \par
|
||||
* Fast versions are supported for Q31 and Q15. Cycles for Fast versions are less compared to Q31 and Q15 of conv and the design requires
|
||||
* the input signals should be scaled down to avoid intermediate overflows.
|
||||
*
|
||||
*
|
||||
* <b>Opt Versions</b>
|
||||
*
|
||||
* \par
|
||||
* Opt versions are supported for Q15 and Q7. Design uses internal scratch buffer for getting good optimisation.
|
||||
* These versions are optimised in cycles and consumes more memory(Scratch memory) compared to Q15 and Q7 versions
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup Conv
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Convolution of floating-point sequences.
|
||||
* @param[in] *pSrcA points to the first input sequence.
|
||||
* @param[in] srcALen length of the first input sequence.
|
||||
* @param[in] *pSrcB points to the second input sequence.
|
||||
* @param[in] srcBLen length of the second input sequence.
|
||||
* @param[out] *pDst points to the location where the output result is written. Length srcALen+srcBLen-1.
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_conv_f32(
|
||||
float32_t * pSrcA,
|
||||
uint32_t srcALen,
|
||||
float32_t * pSrcB,
|
||||
uint32_t srcBLen,
|
||||
float32_t * pDst)
|
||||
{
|
||||
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
float32_t *pIn1; /* inputA pointer */
|
||||
float32_t *pIn2; /* inputB pointer */
|
||||
float32_t *pOut = pDst; /* output pointer */
|
||||
float32_t *px; /* Intermediate inputA pointer */
|
||||
float32_t *py; /* Intermediate inputB pointer */
|
||||
float32_t *pSrc1, *pSrc2; /* Intermediate pointers */
|
||||
float32_t sum, acc0, acc1, acc2, acc3; /* Accumulator */
|
||||
float32_t x0, x1, x2, x3, c0; /* Temporary variables to hold state and coefficient values */
|
||||
uint32_t j, k, count, blkCnt, blockSize1, blockSize2, blockSize3; /* loop counters */
|
||||
|
||||
/* The algorithm implementation is based on the lengths of the inputs. */
|
||||
/* srcB is always made to slide across srcA. */
|
||||
/* So srcBLen is always considered as shorter or equal to srcALen */
|
||||
if(srcALen >= srcBLen)
|
||||
{
|
||||
/* Initialization of inputA pointer */
|
||||
pIn1 = pSrcA;
|
||||
|
||||
/* Initialization of inputB pointer */
|
||||
pIn2 = pSrcB;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Initialization of inputA pointer */
|
||||
pIn1 = pSrcB;
|
||||
|
||||
/* Initialization of inputB pointer */
|
||||
pIn2 = pSrcA;
|
||||
|
||||
/* srcBLen is always considered as shorter or equal to srcALen */
|
||||
j = srcBLen;
|
||||
srcBLen = srcALen;
|
||||
srcALen = j;
|
||||
}
|
||||
|
||||
/* conv(x,y) at n = x[n] * y[0] + x[n-1] * y[1] + x[n-2] * y[2] + ...+ x[n-N+1] * y[N -1] */
|
||||
/* The function is internally
|
||||
* divided into three stages according to the number of multiplications that has to be
|
||||
* taken place between inputA samples and inputB samples. In the first stage of the
|
||||
* algorithm, the multiplications increase by one for every iteration.
|
||||
* In the second stage of the algorithm, srcBLen number of multiplications are done.
|
||||
* In the third stage of the algorithm, the multiplications decrease by one
|
||||
* for every iteration. */
|
||||
|
||||
/* The algorithm is implemented in three stages.
|
||||
The loop counters of each stage is initiated here. */
|
||||
blockSize1 = srcBLen - 1u;
|
||||
blockSize2 = srcALen - (srcBLen - 1u);
|
||||
blockSize3 = blockSize1;
|
||||
|
||||
/* --------------------------
|
||||
* initializations of stage1
|
||||
* -------------------------*/
|
||||
|
||||
/* sum = x[0] * y[0]
|
||||
* sum = x[0] * y[1] + x[1] * y[0]
|
||||
* ....
|
||||
* sum = x[0] * y[srcBlen - 1] + x[1] * y[srcBlen - 2] +...+ x[srcBLen - 1] * y[0]
|
||||
*/
|
||||
|
||||
/* In this stage the MAC operations are increased by 1 for every iteration.
|
||||
The count variable holds the number of MAC operations performed */
|
||||
count = 1u;
|
||||
|
||||
/* Working pointer of inputA */
|
||||
px = pIn1;
|
||||
|
||||
/* Working pointer of inputB */
|
||||
py = pIn2;
|
||||
|
||||
|
||||
/* ------------------------
|
||||
* Stage1 process
|
||||
* ----------------------*/
|
||||
|
||||
/* The first stage starts here */
|
||||
while(blockSize1 > 0u)
|
||||
{
|
||||
/* Accumulator is made zero for every iteration */
|
||||
sum = 0.0f;
|
||||
|
||||
/* Apply loop unrolling and compute 4 MACs simultaneously. */
|
||||
k = count >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
|
||||
** a second loop below computes MACs for the remaining 1 to 3 samples. */
|
||||
while(k > 0u)
|
||||
{
|
||||
/* x[0] * y[srcBLen - 1] */
|
||||
sum += *px++ * *py--;
|
||||
|
||||
/* x[1] * y[srcBLen - 2] */
|
||||
sum += *px++ * *py--;
|
||||
|
||||
/* x[2] * y[srcBLen - 3] */
|
||||
sum += *px++ * *py--;
|
||||
|
||||
/* x[3] * y[srcBLen - 4] */
|
||||
sum += *px++ * *py--;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
k--;
|
||||
}
|
||||
|
||||
/* If the count is not a multiple of 4, compute any remaining MACs here.
|
||||
** No loop unrolling is used. */
|
||||
k = count % 0x4u;
|
||||
|
||||
while(k > 0u)
|
||||
{
|
||||
/* Perform the multiply-accumulate */
|
||||
sum += *px++ * *py--;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
k--;
|
||||
}
|
||||
|
||||
/* Store the result in the accumulator in the destination buffer. */
|
||||
*pOut++ = sum;
|
||||
|
||||
/* Update the inputA and inputB pointers for next MAC calculation */
|
||||
py = pIn2 + count;
|
||||
px = pIn1;
|
||||
|
||||
/* Increment the MAC count */
|
||||
count++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blockSize1--;
|
||||
}
|
||||
|
||||
/* --------------------------
|
||||
* Initializations of stage2
|
||||
* ------------------------*/
|
||||
|
||||
/* sum = x[0] * y[srcBLen-1] + x[1] * y[srcBLen-2] +...+ x[srcBLen-1] * y[0]
|
||||
* sum = x[1] * y[srcBLen-1] + x[2] * y[srcBLen-2] +...+ x[srcBLen] * y[0]
|
||||
* ....
|
||||
* sum = x[srcALen-srcBLen-2] * y[srcBLen-1] + x[srcALen] * y[srcBLen-2] +...+ x[srcALen-1] * y[0]
|
||||
*/
|
||||
|
||||
/* Working pointer of inputA */
|
||||
px = pIn1;
|
||||
|
||||
/* Working pointer of inputB */
|
||||
pSrc2 = pIn2 + (srcBLen - 1u);
|
||||
py = pSrc2;
|
||||
|
||||
/* count is index by which the pointer pIn1 to be incremented */
|
||||
count = 0u;
|
||||
|
||||
/* -------------------
|
||||
* Stage2 process
|
||||
* ------------------*/
|
||||
|
||||
/* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.
|
||||
* So, to loop unroll over blockSize2,
|
||||
* srcBLen should be greater than or equal to 4 */
|
||||
if(srcBLen >= 4u)
|
||||
{
|
||||
/* Loop unroll over blockSize2, by 4 */
|
||||
blkCnt = blockSize2 >> 2u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Set all accumulators to zero */
|
||||
acc0 = 0.0f;
|
||||
acc1 = 0.0f;
|
||||
acc2 = 0.0f;
|
||||
acc3 = 0.0f;
|
||||
|
||||
/* read x[0], x[1], x[2] samples */
|
||||
x0 = *(px++);
|
||||
x1 = *(px++);
|
||||
x2 = *(px++);
|
||||
|
||||
/* Apply loop unrolling and compute 4 MACs simultaneously. */
|
||||
k = srcBLen >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
|
||||
** a second loop below computes MACs for the remaining 1 to 3 samples. */
|
||||
do
|
||||
{
|
||||
/* Read y[srcBLen - 1] sample */
|
||||
c0 = *(py--);
|
||||
|
||||
/* Read x[3] sample */
|
||||
x3 = *(px);
|
||||
|
||||
/* Perform the multiply-accumulate */
|
||||
/* acc0 += x[0] * y[srcBLen - 1] */
|
||||
acc0 += x0 * c0;
|
||||
|
||||
/* acc1 += x[1] * y[srcBLen - 1] */
|
||||
acc1 += x1 * c0;
|
||||
|
||||
/* acc2 += x[2] * y[srcBLen - 1] */
|
||||
acc2 += x2 * c0;
|
||||
|
||||
/* acc3 += x[3] * y[srcBLen - 1] */
|
||||
acc3 += x3 * c0;
|
||||
|
||||
/* Read y[srcBLen - 2] sample */
|
||||
c0 = *(py--);
|
||||
|
||||
/* Read x[4] sample */
|
||||
x0 = *(px + 1u);
|
||||
|
||||
/* Perform the multiply-accumulate */
|
||||
/* acc0 += x[1] * y[srcBLen - 2] */
|
||||
acc0 += x1 * c0;
|
||||
/* acc1 += x[2] * y[srcBLen - 2] */
|
||||
acc1 += x2 * c0;
|
||||
/* acc2 += x[3] * y[srcBLen - 2] */
|
||||
acc2 += x3 * c0;
|
||||
/* acc3 += x[4] * y[srcBLen - 2] */
|
||||
acc3 += x0 * c0;
|
||||
|
||||
/* Read y[srcBLen - 3] sample */
|
||||
c0 = *(py--);
|
||||
|
||||
/* Read x[5] sample */
|
||||
x1 = *(px + 2u);
|
||||
|
||||
/* Perform the multiply-accumulates */
|
||||
/* acc0 += x[2] * y[srcBLen - 3] */
|
||||
acc0 += x2 * c0;
|
||||
/* acc1 += x[3] * y[srcBLen - 2] */
|
||||
acc1 += x3 * c0;
|
||||
/* acc2 += x[4] * y[srcBLen - 2] */
|
||||
acc2 += x0 * c0;
|
||||
/* acc3 += x[5] * y[srcBLen - 2] */
|
||||
acc3 += x1 * c0;
|
||||
|
||||
/* Read y[srcBLen - 4] sample */
|
||||
c0 = *(py--);
|
||||
|
||||
/* Read x[6] sample */
|
||||
x2 = *(px + 3u);
|
||||
px += 4u;
|
||||
|
||||
/* Perform the multiply-accumulates */
|
||||
/* acc0 += x[3] * y[srcBLen - 4] */
|
||||
acc0 += x3 * c0;
|
||||
/* acc1 += x[4] * y[srcBLen - 4] */
|
||||
acc1 += x0 * c0;
|
||||
/* acc2 += x[5] * y[srcBLen - 4] */
|
||||
acc2 += x1 * c0;
|
||||
/* acc3 += x[6] * y[srcBLen - 4] */
|
||||
acc3 += x2 * c0;
|
||||
|
||||
|
||||
} while(--k);
|
||||
|
||||
/* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
|
||||
** No loop unrolling is used. */
|
||||
k = srcBLen % 0x4u;
|
||||
|
||||
while(k > 0u)
|
||||
{
|
||||
/* Read y[srcBLen - 5] sample */
|
||||
c0 = *(py--);
|
||||
|
||||
/* Read x[7] sample */
|
||||
x3 = *(px++);
|
||||
|
||||
/* Perform the multiply-accumulates */
|
||||
/* acc0 += x[4] * y[srcBLen - 5] */
|
||||
acc0 += x0 * c0;
|
||||
/* acc1 += x[5] * y[srcBLen - 5] */
|
||||
acc1 += x1 * c0;
|
||||
/* acc2 += x[6] * y[srcBLen - 5] */
|
||||
acc2 += x2 * c0;
|
||||
/* acc3 += x[7] * y[srcBLen - 5] */
|
||||
acc3 += x3 * c0;
|
||||
|
||||
/* Reuse the present samples for the next MAC */
|
||||
x0 = x1;
|
||||
x1 = x2;
|
||||
x2 = x3;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
k--;
|
||||
}
|
||||
|
||||
/* Store the result in the accumulator in the destination buffer. */
|
||||
*pOut++ = acc0;
|
||||
*pOut++ = acc1;
|
||||
*pOut++ = acc2;
|
||||
*pOut++ = acc3;
|
||||
|
||||
/* Increment the pointer pIn1 index, count by 4 */
|
||||
count += 4u;
|
||||
|
||||
/* Update the inputA and inputB pointers for next MAC calculation */
|
||||
px = pIn1 + count;
|
||||
py = pSrc2;
|
||||
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
|
||||
/* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize2 % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Accumulator is made zero for every iteration */
|
||||
sum = 0.0f;
|
||||
|
||||
/* Apply loop unrolling and compute 4 MACs simultaneously. */
|
||||
k = srcBLen >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
|
||||
** a second loop below computes MACs for the remaining 1 to 3 samples. */
|
||||
while(k > 0u)
|
||||
{
|
||||
/* Perform the multiply-accumulates */
|
||||
sum += *px++ * *py--;
|
||||
sum += *px++ * *py--;
|
||||
sum += *px++ * *py--;
|
||||
sum += *px++ * *py--;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
k--;
|
||||
}
|
||||
|
||||
/* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
|
||||
** No loop unrolling is used. */
|
||||
k = srcBLen % 0x4u;
|
||||
|
||||
while(k > 0u)
|
||||
{
|
||||
/* Perform the multiply-accumulate */
|
||||
sum += *px++ * *py--;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
k--;
|
||||
}
|
||||
|
||||
/* Store the result in the accumulator in the destination buffer. */
|
||||
*pOut++ = sum;
|
||||
|
||||
/* Increment the MAC count */
|
||||
count++;
|
||||
|
||||
/* Update the inputA and inputB pointers for next MAC calculation */
|
||||
px = pIn1 + count;
|
||||
py = pSrc2;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* If the srcBLen is not a multiple of 4,
|
||||
* the blockSize2 loop cannot be unrolled by 4 */
|
||||
blkCnt = blockSize2;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Accumulator is made zero for every iteration */
|
||||
sum = 0.0f;
|
||||
|
||||
/* srcBLen number of MACS should be performed */
|
||||
k = srcBLen;
|
||||
|
||||
while(k > 0u)
|
||||
{
|
||||
/* Perform the multiply-accumulate */
|
||||
sum += *px++ * *py--;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
k--;
|
||||
}
|
||||
|
||||
/* Store the result in the accumulator in the destination buffer. */
|
||||
*pOut++ = sum;
|
||||
|
||||
/* Increment the MAC count */
|
||||
count++;
|
||||
|
||||
/* Update the inputA and inputB pointers for next MAC calculation */
|
||||
px = pIn1 + count;
|
||||
py = pSrc2;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* --------------------------
|
||||
* Initializations of stage3
|
||||
* -------------------------*/
|
||||
|
||||
/* sum += x[srcALen-srcBLen+1] * y[srcBLen-1] + x[srcALen-srcBLen+2] * y[srcBLen-2] +...+ x[srcALen-1] * y[1]
|
||||
* sum += x[srcALen-srcBLen+2] * y[srcBLen-1] + x[srcALen-srcBLen+3] * y[srcBLen-2] +...+ x[srcALen-1] * y[2]
|
||||
* ....
|
||||
* sum += x[srcALen-2] * y[srcBLen-1] + x[srcALen-1] * y[srcBLen-2]
|
||||
* sum += x[srcALen-1] * y[srcBLen-1]
|
||||
*/
|
||||
|
||||
/* In this stage the MAC operations are decreased by 1 for every iteration.
|
||||
The blockSize3 variable holds the number of MAC operations performed */
|
||||
|
||||
/* Working pointer of inputA */
|
||||
pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
|
||||
px = pSrc1;
|
||||
|
||||
/* Working pointer of inputB */
|
||||
pSrc2 = pIn2 + (srcBLen - 1u);
|
||||
py = pSrc2;
|
||||
|
||||
/* -------------------
|
||||
* Stage3 process
|
||||
* ------------------*/
|
||||
|
||||
while(blockSize3 > 0u)
|
||||
{
|
||||
/* Accumulator is made zero for every iteration */
|
||||
sum = 0.0f;
|
||||
|
||||
/* Apply loop unrolling and compute 4 MACs simultaneously. */
|
||||
k = blockSize3 >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
|
||||
** a second loop below computes MACs for the remaining 1 to 3 samples. */
|
||||
while(k > 0u)
|
||||
{
|
||||
/* sum += x[srcALen - srcBLen + 1] * y[srcBLen - 1] */
|
||||
sum += *px++ * *py--;
|
||||
|
||||
/* sum += x[srcALen - srcBLen + 2] * y[srcBLen - 2] */
|
||||
sum += *px++ * *py--;
|
||||
|
||||
/* sum += x[srcALen - srcBLen + 3] * y[srcBLen - 3] */
|
||||
sum += *px++ * *py--;
|
||||
|
||||
/* sum += x[srcALen - srcBLen + 4] * y[srcBLen - 4] */
|
||||
sum += *px++ * *py--;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
k--;
|
||||
}
|
||||
|
||||
/* If the blockSize3 is not a multiple of 4, compute any remaining MACs here.
|
||||
** No loop unrolling is used. */
|
||||
k = blockSize3 % 0x4u;
|
||||
|
||||
while(k > 0u)
|
||||
{
|
||||
/* Perform the multiply-accumulates */
|
||||
/* sum += x[srcALen-1] * y[srcBLen-1] */
|
||||
sum += *px++ * *py--;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
k--;
|
||||
}
|
||||
|
||||
/* Store the result in the accumulator in the destination buffer. */
|
||||
*pOut++ = sum;
|
||||
|
||||
/* Update the inputA and inputB pointers for next MAC calculation */
|
||||
px = ++pSrc1;
|
||||
py = pSrc2;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blockSize3--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
float32_t *pIn1 = pSrcA; /* inputA pointer */
|
||||
float32_t *pIn2 = pSrcB; /* inputB pointer */
|
||||
float32_t sum; /* Accumulator */
|
||||
uint32_t i, j; /* loop counters */
|
||||
|
||||
/* Loop to calculate convolution for output length number of times */
|
||||
for (i = 0u; i < ((srcALen + srcBLen) - 1u); i++)
|
||||
{
|
||||
/* Initialize sum with zero to carry out MAC operations */
|
||||
sum = 0.0f;
|
||||
|
||||
/* Loop to perform MAC operations according to convolution equation */
|
||||
for (j = 0u; j <= i; j++)
|
||||
{
|
||||
/* Check the array limitations */
|
||||
if((((i - j) < srcBLen) && (j < srcALen)))
|
||||
{
|
||||
/* z[i] += x[i-j] * y[j] */
|
||||
sum += pIn1[j] * pIn2[i - j];
|
||||
}
|
||||
}
|
||||
/* Store the output in the destination buffer */
|
||||
pDst[i] = sum;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of Conv group
|
||||
*/
|
|
@ -1,538 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. February 2012
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_conv_fast_opt_q15.c
|
||||
*
|
||||
* Description: Fast Q15 Convolution.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3
|
||||
*
|
||||
* Version 1.1.0 2012/02/15
|
||||
* Updated with more optimizations, bug fixes and minor API changes.
|
||||
*
|
||||
* Version 1.0.11 2011/10/18
|
||||
* Bug Fix in conv, correlation, partial convolution.
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupFilters
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup Conv
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
|
||||
* @param[in] *pSrcA points to the first input sequence.
|
||||
* @param[in] srcALen length of the first input sequence.
|
||||
* @param[in] *pSrcB points to the second input sequence.
|
||||
* @param[in] srcBLen length of the second input sequence.
|
||||
* @param[out] *pDst points to the location where the output result is written. Length srcALen+srcBLen-1.
|
||||
* @param[in] *pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
|
||||
* @param[in] *pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
|
||||
* @return none.
|
||||
*
|
||||
* \par Restrictions
|
||||
* If the silicon does not support unaligned memory access enable the macro UNALIGNED_SUPPORT_DISABLE
|
||||
* In this case input, output, scratch1 and scratch2 buffers should be aligned by 32-bit
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
*
|
||||
* \par
|
||||
* This fast version uses a 32-bit accumulator with 2.30 format.
|
||||
* The accumulator maintains full precision of the intermediate multiplication results
|
||||
* but provides only a single guard bit. There is no saturation on intermediate additions.
|
||||
* Thus, if the accumulator overflows it wraps around and distorts the result.
|
||||
* The input signals should be scaled down to avoid intermediate overflows.
|
||||
* Scale down the inputs by log2(min(srcALen, srcBLen)) (log2 is read as log to the base 2) times to avoid overflows,
|
||||
* as maximum of min(srcALen, srcBLen) number of additions are carried internally.
|
||||
* The 2.30 accumulator is right shifted by 15 bits and then saturated to 1.15 format to yield the final result.
|
||||
*
|
||||
* \par
|
||||
* See <code>arm_conv_q15()</code> for a slower implementation of this function which uses 64-bit accumulation to avoid wrap around distortion.
|
||||
*/
|
||||
|
||||
void arm_conv_fast_opt_q15(
|
||||
q15_t * pSrcA,
|
||||
uint32_t srcALen,
|
||||
q15_t * pSrcB,
|
||||
uint32_t srcBLen,
|
||||
q15_t * pDst,
|
||||
q15_t * pScratch1,
|
||||
q15_t * pScratch2)
|
||||
{
|
||||
q31_t acc0, acc1, acc2, acc3; /* Accumulators */
|
||||
q31_t x1, x2, x3; /* Temporary variables to hold state and coefficient values */
|
||||
q31_t y1, y2; /* State variables */
|
||||
q15_t *pOut = pDst; /* output pointer */
|
||||
q15_t *pScr1 = pScratch1; /* Temporary pointer for scratch1 */
|
||||
q15_t *pScr2 = pScratch2; /* Temporary pointer for scratch1 */
|
||||
q15_t *pIn1; /* inputA pointer */
|
||||
q15_t *pIn2; /* inputB pointer */
|
||||
q15_t *px; /* Intermediate inputA pointer */
|
||||
q15_t *py; /* Intermediate inputB pointer */
|
||||
uint32_t j, k, blkCnt; /* loop counter */
|
||||
uint32_t tapCnt; /* loop count */
|
||||
#ifdef UNALIGNED_SUPPORT_DISABLE
|
||||
|
||||
q15_t a, b;
|
||||
|
||||
#endif /* #ifdef UNALIGNED_SUPPORT_DISABLE */
|
||||
|
||||
/* The algorithm implementation is based on the lengths of the inputs. */
|
||||
/* srcB is always made to slide across srcA. */
|
||||
/* So srcBLen is always considered as shorter or equal to srcALen */
|
||||
if(srcALen >= srcBLen)
|
||||
{
|
||||
/* Initialization of inputA pointer */
|
||||
pIn1 = pSrcA;
|
||||
|
||||
/* Initialization of inputB pointer */
|
||||
pIn2 = pSrcB;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Initialization of inputA pointer */
|
||||
pIn1 = pSrcB;
|
||||
|
||||
/* Initialization of inputB pointer */
|
||||
pIn2 = pSrcA;
|
||||
|
||||
/* srcBLen is always considered as shorter or equal to srcALen */
|
||||
j = srcBLen;
|
||||
srcBLen = srcALen;
|
||||
srcALen = j;
|
||||
}
|
||||
|
||||
/* Pointer to take end of scratch2 buffer */
|
||||
pScr2 = pScratch2 + srcBLen - 1;
|
||||
|
||||
/* points to smaller length sequence */
|
||||
px = pIn2;
|
||||
|
||||
/* Apply loop unrolling and do 4 Copies simultaneously. */
|
||||
k = srcBLen >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling copies 4 data points at a time.
|
||||
** a second loop below copies for the remaining 1 to 3 samples. */
|
||||
|
||||
/* Copy smaller length input sequence in reverse order into second scratch buffer */
|
||||
while(k > 0u)
|
||||
{
|
||||
/* copy second buffer in reversal manner */
|
||||
*pScr2-- = *px++;
|
||||
*pScr2-- = *px++;
|
||||
*pScr2-- = *px++;
|
||||
*pScr2-- = *px++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
k--;
|
||||
}
|
||||
|
||||
/* If the count is not a multiple of 4, copy remaining samples here.
|
||||
** No loop unrolling is used. */
|
||||
k = srcBLen % 0x4u;
|
||||
|
||||
while(k > 0u)
|
||||
{
|
||||
/* copy second buffer in reversal manner for remaining samples */
|
||||
*pScr2-- = *px++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
k--;
|
||||
}
|
||||
|
||||
/* Initialze temporary scratch pointer */
|
||||
pScr1 = pScratch1;
|
||||
|
||||
/* Assuming scratch1 buffer is aligned by 32-bit */
|
||||
/* Fill (srcBLen - 1u) zeros in scratch1 buffer */
|
||||
arm_fill_q15(0, pScr1, (srcBLen - 1u));
|
||||
|
||||
/* Update temporary scratch pointer */
|
||||
pScr1 += (srcBLen - 1u);
|
||||
|
||||
/* Copy bigger length sequence(srcALen) samples in scratch1 buffer */
|
||||
|
||||
#ifndef UNALIGNED_SUPPORT_DISABLE
|
||||
|
||||
/* Copy (srcALen) samples in scratch buffer */
|
||||
arm_copy_q15(pIn1, pScr1, srcALen);
|
||||
|
||||
/* Update pointers */
|
||||
pScr1 += srcALen;
|
||||
|
||||
#else
|
||||
|
||||
/* Apply loop unrolling and do 4 Copies simultaneously. */
|
||||
k = srcALen >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling copies 4 data points at a time.
|
||||
** a second loop below copies for the remaining 1 to 3 samples. */
|
||||
while(k > 0u)
|
||||
{
|
||||
/* copy second buffer in reversal manner */
|
||||
*pScr1++ = *pIn1++;
|
||||
*pScr1++ = *pIn1++;
|
||||
*pScr1++ = *pIn1++;
|
||||
*pScr1++ = *pIn1++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
k--;
|
||||
}
|
||||
|
||||
/* If the count is not a multiple of 4, copy remaining samples here.
|
||||
** No loop unrolling is used. */
|
||||
k = srcALen % 0x4u;
|
||||
|
||||
while(k > 0u)
|
||||
{
|
||||
/* copy second buffer in reversal manner for remaining samples */
|
||||
*pScr1++ = *pIn1++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
k--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
|
||||
|
||||
|
||||
#ifndef UNALIGNED_SUPPORT_DISABLE
|
||||
|
||||
/* Fill (srcBLen - 1u) zeros at end of scratch buffer */
|
||||
arm_fill_q15(0, pScr1, (srcBLen - 1u));
|
||||
|
||||
/* Update pointer */
|
||||
pScr1 += (srcBLen - 1u);
|
||||
|
||||
#else
|
||||
|
||||
/* Apply loop unrolling and do 4 Copies simultaneously. */
|
||||
k = (srcBLen - 1u) >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling copies 4 data points at a time.
|
||||
** a second loop below copies for the remaining 1 to 3 samples. */
|
||||
while(k > 0u)
|
||||
{
|
||||
/* copy second buffer in reversal manner */
|
||||
*pScr1++ = 0;
|
||||
*pScr1++ = 0;
|
||||
*pScr1++ = 0;
|
||||
*pScr1++ = 0;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
k--;
|
||||
}
|
||||
|
||||
/* If the count is not a multiple of 4, copy remaining samples here.
|
||||
** No loop unrolling is used. */
|
||||
k = (srcBLen - 1u) % 0x4u;
|
||||
|
||||
while(k > 0u)
|
||||
{
|
||||
/* copy second buffer in reversal manner for remaining samples */
|
||||
*pScr1++ = 0;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
k--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
|
||||
|
||||
/* Temporary pointer for scratch2 */
|
||||
py = pScratch2;
|
||||
|
||||
|
||||
/* Initialization of pIn2 pointer */
|
||||
pIn2 = py;
|
||||
|
||||
/* First part of the processing with loop unrolling process 4 data points at a time.
|
||||
** a second loop below process for the remaining 1 to 3 samples. */
|
||||
|
||||
/* Actual convolution process starts here */
|
||||
blkCnt = (srcALen + srcBLen - 1u) >> 2;
|
||||
|
||||
while(blkCnt > 0)
|
||||
{
|
||||
/* Initialze temporary scratch pointer as scratch1 */
|
||||
pScr1 = pScratch1;
|
||||
|
||||
/* Clear Accumlators */
|
||||
acc0 = 0;
|
||||
acc1 = 0;
|
||||
acc2 = 0;
|
||||
acc3 = 0;
|
||||
|
||||
/* Read two samples from scratch1 buffer */
|
||||
x1 = *__SIMD32(pScr1)++;
|
||||
|
||||
/* Read next two samples from scratch1 buffer */
|
||||
x2 = *__SIMD32(pScr1)++;
|
||||
|
||||
tapCnt = (srcBLen) >> 2u;
|
||||
|
||||
while(tapCnt > 0u)
|
||||
{
|
||||
|
||||
#ifndef UNALIGNED_SUPPORT_DISABLE
|
||||
|
||||
/* Read four samples from smaller buffer */
|
||||
y1 = _SIMD32_OFFSET(pIn2);
|
||||
y2 = _SIMD32_OFFSET(pIn2 + 2u);
|
||||
|
||||
/* multiply and accumlate */
|
||||
acc0 = __SMLAD(x1, y1, acc0);
|
||||
acc2 = __SMLAD(x2, y1, acc2);
|
||||
|
||||
/* pack input data */
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
x3 = __PKHBT(x2, x1, 0);
|
||||
#else
|
||||
x3 = __PKHBT(x1, x2, 0);
|
||||
#endif
|
||||
|
||||
/* multiply and accumlate */
|
||||
acc1 = __SMLADX(x3, y1, acc1);
|
||||
|
||||
/* Read next two samples from scratch1 buffer */
|
||||
x1 = _SIMD32_OFFSET(pScr1);
|
||||
|
||||
/* multiply and accumlate */
|
||||
acc0 = __SMLAD(x2, y2, acc0);
|
||||
acc2 = __SMLAD(x1, y2, acc2);
|
||||
|
||||
/* pack input data */
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
x3 = __PKHBT(x1, x2, 0);
|
||||
#else
|
||||
x3 = __PKHBT(x2, x1, 0);
|
||||
#endif
|
||||
|
||||
acc3 = __SMLADX(x3, y1, acc3);
|
||||
acc1 = __SMLADX(x3, y2, acc1);
|
||||
|
||||
x2 = _SIMD32_OFFSET(pScr1 + 2u);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
x3 = __PKHBT(x2, x1, 0);
|
||||
#else
|
||||
x3 = __PKHBT(x1, x2, 0);
|
||||
#endif
|
||||
|
||||
acc3 = __SMLADX(x3, y2, acc3);
|
||||
|
||||
#else
|
||||
|
||||
/* Read four samples from smaller buffer */
|
||||
a = *pIn2;
|
||||
b = *(pIn2 + 1);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
y1 = __PKHBT(a, b, 16);
|
||||
#else
|
||||
y1 = __PKHBT(b, a, 16);
|
||||
#endif
|
||||
|
||||
a = *(pIn2 + 2);
|
||||
b = *(pIn2 + 3);
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
y2 = __PKHBT(a, b, 16);
|
||||
#else
|
||||
y2 = __PKHBT(b, a, 16);
|
||||
#endif
|
||||
|
||||
acc0 = __SMLAD(x1, y1, acc0);
|
||||
|
||||
acc2 = __SMLAD(x2, y1, acc2);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
x3 = __PKHBT(x2, x1, 0);
|
||||
#else
|
||||
x3 = __PKHBT(x1, x2, 0);
|
||||
#endif
|
||||
|
||||
acc1 = __SMLADX(x3, y1, acc1);
|
||||
|
||||
a = *pScr1;
|
||||
b = *(pScr1 + 1);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
x1 = __PKHBT(a, b, 16);
|
||||
#else
|
||||
x1 = __PKHBT(b, a, 16);
|
||||
#endif
|
||||
|
||||
acc0 = __SMLAD(x2, y2, acc0);
|
||||
|
||||
acc2 = __SMLAD(x1, y2, acc2);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
x3 = __PKHBT(x1, x2, 0);
|
||||
#else
|
||||
x3 = __PKHBT(x2, x1, 0);
|
||||
#endif
|
||||
|
||||
acc3 = __SMLADX(x3, y1, acc3);
|
||||
|
||||
acc1 = __SMLADX(x3, y2, acc1);
|
||||
|
||||
a = *(pScr1 + 2);
|
||||
b = *(pScr1 + 3);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
x2 = __PKHBT(a, b, 16);
|
||||
#else
|
||||
x2 = __PKHBT(b, a, 16);
|
||||
#endif
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
x3 = __PKHBT(x2, x1, 0);
|
||||
#else
|
||||
x3 = __PKHBT(x1, x2, 0);
|
||||
#endif
|
||||
|
||||
acc3 = __SMLADX(x3, y2, acc3);
|
||||
|
||||
#endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
|
||||
|
||||
/* update scratch pointers */
|
||||
pIn2 += 4u;
|
||||
pScr1 += 4u;
|
||||
|
||||
|
||||
/* Decrement the loop counter */
|
||||
tapCnt--;
|
||||
}
|
||||
|
||||
/* Update scratch pointer for remaining samples of smaller length sequence */
|
||||
pScr1 -= 4u;
|
||||
|
||||
/* apply same above for remaining samples of smaller length sequence */
|
||||
tapCnt = (srcBLen) & 3u;
|
||||
|
||||
while(tapCnt > 0u)
|
||||
{
|
||||
|
||||
/* accumlate the results */
|
||||
acc0 += (*pScr1++ * *pIn2);
|
||||
acc1 += (*pScr1++ * *pIn2);
|
||||
acc2 += (*pScr1++ * *pIn2);
|
||||
acc3 += (*pScr1++ * *pIn2++);
|
||||
|
||||
pScr1 -= 3u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
tapCnt--;
|
||||
}
|
||||
|
||||
blkCnt--;
|
||||
|
||||
|
||||
/* Store the results in the accumulators in the destination buffer. */
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
*__SIMD32(pOut)++ =
|
||||
__PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16);
|
||||
|
||||
*__SIMD32(pOut)++ =
|
||||
__PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16);
|
||||
|
||||
|
||||
#else
|
||||
|
||||
*__SIMD32(pOut)++ =
|
||||
__PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16);
|
||||
|
||||
*__SIMD32(pOut)++ =
|
||||
__PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16);
|
||||
|
||||
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* Initialization of inputB pointer */
|
||||
pIn2 = py;
|
||||
|
||||
pScratch1 += 4u;
|
||||
|
||||
}
|
||||
|
||||
|
||||
blkCnt = (srcALen + srcBLen - 1u) & 0x3;
|
||||
|
||||
/* Calculate convolution for remaining samples of Bigger length sequence */
|
||||
while(blkCnt > 0)
|
||||
{
|
||||
/* Initialze temporary scratch pointer as scratch1 */
|
||||
pScr1 = pScratch1;
|
||||
|
||||
/* Clear Accumlators */
|
||||
acc0 = 0;
|
||||
|
||||
tapCnt = (srcBLen) >> 1u;
|
||||
|
||||
while(tapCnt > 0u)
|
||||
{
|
||||
|
||||
acc0 += (*pScr1++ * *pIn2++);
|
||||
acc0 += (*pScr1++ * *pIn2++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
tapCnt--;
|
||||
}
|
||||
|
||||
tapCnt = (srcBLen) & 1u;
|
||||
|
||||
/* apply same above for remaining samples of smaller length sequence */
|
||||
while(tapCnt > 0u)
|
||||
{
|
||||
|
||||
/* accumlate the results */
|
||||
acc0 += (*pScr1++ * *pIn2++);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
tapCnt--;
|
||||
}
|
||||
|
||||
blkCnt--;
|
||||
|
||||
/* The result is in 2.30 format. Convert to 1.15 with saturation.
|
||||
** Then store the output in the destination buffer. */
|
||||
*pOut++ = (q15_t) (__SSAT((acc0 >> 15), 16));
|
||||
|
||||
/* Initialization of inputB pointer */
|
||||
pIn2 = py;
|
||||
|
||||
pScratch1 += 1u;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of Conv group
|
||||
*/
|
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue