在前面帖子实现ADC DMA采集、DSP库添加的基础上,本帖将采集1024点数据进行FFT运算,评估STM32H7S78的DSP性能。
测试代码如下:
/* USER CODE END Header */
/* Includes ------------------------------------------------------------------*/
#include "main.h"
/* Private includes ----------------------------------------------------------*/
/* USER CODE BEGIN Includes */
#include "stdio.h"
#include "arm_math.h"
#include "perf_counter.h"
/* USER CODE END Includes */
/* Private typedef -----------------------------------------------------------*/
/* USER CODE BEGIN PTD */
/* USER CODE END PTD */
/* Private define ------------------------------------------------------------*/
/* USER CODE BEGIN PD */
#define VDDA_APPLI (3300UL)
#define VAR_CONVERTED_DATA_INIT_VALUE (__LL_ADC_DIGITAL_SCALE(LL_ADC_RESOLUTION_12B) + 1)
#define ADC_CONVERTED_DATA_BUFFER_SIZE 1024
ALIGN_32BYTES (uint16_t uhADCxConvertedData[ADC_CONVERTED_DATA_BUFFER_SIZE]);
uint16_t uhADCxConvertedData_Voltage_mVolt[ADC_CONVERTED_DATA_BUFFER_SIZE];
//0: DMA transfer is not completed
//1: DMA transfer is completed
//2: DMA transfer has not yet been started yet (initial state)
__IO uint8_t ubDmaTransferStatus = 2U;
/* USER CODE END PD */
/* Private macro -------------------------------------------------------------*/
/* USER CODE BEGIN PM */
/* USER CODE END PM */
/* Private variables ---------------------------------------------------------*/
ADC_HandleTypeDef hadc2;
DMA_HandleTypeDef handle_GPDMA1_Channel0;
TIM_HandleTypeDef htim6;
UART_HandleTypeDef huart4;
/* USER CODE BEGIN PV */
/* USER CODE END PV */
/* Private function prototypes -----------------------------------------------*/
static void MPU_Config(void);
static void MX_GPIO_Init(void);
static void MX_GPDMA1_Init(void);
static void MX_TIM6_Init(void);
static void MX_UART4_Init(void);
static void MX_ADC2_Init(void);
/* USER CODE BEGIN PFP */
/* USER CODE END PFP */
/* Private user code ---------------------------------------------------------*/
/* USER CODE BEGIN 0 */
/* USER CODE BEGIN PFP */
int __io_putchar(int ch)
{
HAL_UART_Transmit(&huart4 , (uint8_t *)&ch, 1, 0xFFFF);
return ch;
}
/* USER CODE END PFP */
void HAL_ADC_ConvCpltCallback(ADC_HandleTypeDef *hadc)
{
ubDmaTransferStatus = 1;
}
int tmp_index;
#define FFT_LENGTH ADC_CONVERTED_DATA_BUFFER_SIZE
float fft_inputbuf[FFT_LENGTH * 2];
float fft_outputbuf[FFT_LENGTH];
uint32_t ifftFlag = 0;
uint32_t fftSize = 0;
#define TEST_LENGTH_SAMPLES 1024
static float32_t testOutput_f32[TEST_LENGTH_SAMPLES*2];
static float32_t testOutputMag_f32[TEST_LENGTH_SAMPLES*2];
static float32_t testInput_f32[TEST_LENGTH_SAMPLES*2];
static float32_t Phase_f32[TEST_LENGTH_SAMPLES*2];
void PowerPhaseRadians_f32(float32_t *_ptr, float32_t *_phase, uint16_t _usFFTPoints, float32_t _uiCmpValue)
{
float32_t lX, lY;
uint16_t i;
float32_t phase;
float32_t mag;
for (i=0; i <_usFFTPoints; i++)
{
lX= _ptr[2*i];//实部
lY= _ptr[2*i + 1];//虚部
phase = atan2f(lY, lX);//atan2求解的结果范围是(-pi, pi], 弧度
arm_sqrt_f32((float32_t)(lX*lX+ lY*lY), &mag);//求模
if(_uiCmpValue > mag)
{
Phase_f32[i] = 0;
}
else
{
Phase_f32[i] = phase* 180.0f/3.1415926f;//将求解的结果由弧度转换为角度
}
}
}
static void arm_rfft_f32_app2(void)
{
uint16_t i;
arm_rfft_fast_instance_f32 S;
//正变换
ifftFlag = 0;
//初始化结构体S中的参数
arm_rfft_fast_init_f32(&S, TEST_LENGTH_SAMPLES);
for(i=0; i<TEST_LENGTH_SAMPLES; i++)
{
//测试波形
//testInput_f32[i] = 1 + cos(2*3.1415926f*50*i/1024 + 3.1415926f/3);
testInput_f32[i * 2] = uhADCxConvertedData_Voltage_mVolt[i];
testInput_f32[i * 2 + 1] = 0;//虚部赋值,固定为0.
}
//1024点实序列快速变换
arm_rfft_fast_f32(&S, testInput_f32, testOutput_f32, ifftFlag);
arm_cmplx_mag_f32(testOutput_f32, testOutputMag_f32, TEST_LENGTH_SAMPLES);
PowerPhaseRadians_f32(testOutput_f32, Phase_f32, TEST_LENGTH_SAMPLES, 0.5f);
//串口打印求解的幅频和相频
for(i=0; i<TEST_LENGTH_SAMPLES; i++)
{
//printf("%f, %f\r\n", testOutputMag_f32[i], Phase_f32[i]);
}
}
/* USER CODE END 0 */
/**
* [url=home.php?mod=space&uid=159083]@brief[/url] The application entry point.
* @retval int
*/
int main(void)
{
/* USER CODE BEGIN 1 */
/* USER CODE END 1 */
/* MPU Configuration--------------------------------------------------------*/
MPU_Config();
/* MCU Configuration--------------------------------------------------------*/
/* Update SystemCoreClock variable according to RCC registers values. */
SystemCoreClockUpdate();
/* Reset of all peripherals, Initializes the Flash interface and the Systick. */
HAL_Init();
/* USER CODE BEGIN Init */
init_cycle_counter(true);
/* USER CODE END Init */
/* USER CODE BEGIN SysInit */
/* USER CODE END SysInit */
/* Initialize all configured peripherals */
MX_GPIO_Init();
MX_GPDMA1_Init();
MX_TIM6_Init();
MX_UART4_Init();
MX_ADC2_Init();
/* USER CODE BEGIN 2 */
start_cycle_counter();
//DSP_RMS();
int64_t lCycleUsed = stop_cycle_counter();
printf("ADC DMA DEMO\n");
for (tmp_index = 0; tmp_index < ADC_CONVERTED_DATA_BUFFER_SIZE; tmp_index++)
{
uhADCxConvertedData[tmp_index] = VAR_CONVERTED_DATA_INIT_VALUE;
}
if (HAL_ADCEx_Calibration_Start(&hadc2, ADC_SINGLE_ENDED) != HAL_OK)
{
printf("ADC Calibration Err\n");
Error_Handler();
}
printf("ADC Calibration End\n");
/* USER CODE END 2 */
/* Infinite loop */
/* USER CODE BEGIN WHILE */
while (1)
{
/* USER CODE END WHILE */
/* USER CODE BEGIN 3 */
HAL_GPIO_TogglePin(LD4_GPIO_Port, LD4_Pin);
if (HAL_ADC_Start_DMA(&hadc2,(uint32_t *)uhADCxConvertedData,ADC_CONVERTED_DATA_BUFFER_SIZE) != HAL_OK)
{
Error_Handler();
}
HAL_Delay(50);
if (ubDmaTransferStatus == 1)
{
ubDmaTransferStatus = 0;
for (tmp_index = 0; tmp_index < ADC_CONVERTED_DATA_BUFFER_SIZE; tmp_index++)
{
uhADCxConvertedData_Voltage_mVolt[tmp_index] = __LL_ADC_CALC_DATA_TO_VOLTAGE(VDDA_APPLI, uhADCxConvertedData[tmp_index], LL_ADC_RESOLUTION_12B);
//printf("voltage[%d]=%d mV\n",tmp_index,uhADCxConvertedData_Voltage_mVolt[tmp_index]);
//printf("%d\n",uhADCxConvertedData_Voltage_mVolt[tmp_index]);
arm_rfft_f32_app2();
printf("cycle counter = %lld\n",lCycleUsed);
}
}
}
/* USER CODE END 3 */
}
主要用到的函数是arm_rfft_fast_f32,函数原型是
void arm_rfft_fast_f32(
const arm_rfft_fast_instance_f32 * S,
float32_t * p,
float32_t * pOut,
uint8_t ifftFlag)
这个函数用于单精度浮点实数FFT,函数有4个参数:
S是FFT实例化句柄,调用函数arm_rfft_fast_init_f32初始化得到,然后供此函数arm_rfft_fast_f32调用。支持32, 64, 128, 256, 512, 1024, 2048, 4096点FFT。
比如做1024点FFT,代码如下:
arm_rfft_fast_instance_f32 S;
arm_rfft_fast_init_f32(&S, 1024);
arm_rfft_fast_f32(&S, testInput_f32, testOutput_f32, ifftFlag);
p是实数地址,比如我们要做1024点实数FFT,要保证有1024个缓冲。
pOut是FFT转换结果,转换结果不是实数了,而是复数,按照实部,虚拟,实部,虚部,依次排列。比如做1024点FFT,这里的输出也会有1024个数据,即512个复位。
ifftFlag用于设置正变换和逆变换,ifftFlag=0表示正变换,ifftFlag=1表示逆变换。
测试结果:
调用arm_rfft_fast_f32计算1024点FFT耗时184个时钟周期,主频配置的是600MHz,即1/600*184=0.307us,下图是大佬测试的STM32F4和F1的数据,可见1024点FFT性能可以说是F4的369.25/0.307=1202倍