[经验] 【STM32H7S78-DK】测评+ADC DMA采集1024点FFT计算速度测评

dql2016   2024-11-11 21:18 楼主

在前面帖子实现ADC DMA采集、DSP库添加的基础上,本帖将采集1024点数据进行FFT运算,评估STM32H7S78的DSP性能。

测试代码如下:

/* USER CODE END Header */
/* Includes ------------------------------------------------------------------*/
#include "main.h"

/* Private includes ----------------------------------------------------------*/
/* USER CODE BEGIN Includes */

#include "stdio.h"
#include "arm_math.h"
#include "perf_counter.h"

/* USER CODE END Includes */

/* Private typedef -----------------------------------------------------------*/
/* USER CODE BEGIN PTD */

/* USER CODE END PTD */

/* Private define ------------------------------------------------------------*/
/* USER CODE BEGIN PD */

#define VDDA_APPLI                       (3300UL)
#define VAR_CONVERTED_DATA_INIT_VALUE    (__LL_ADC_DIGITAL_SCALE(LL_ADC_RESOLUTION_12B) + 1)
#define ADC_CONVERTED_DATA_BUFFER_SIZE   1024
ALIGN_32BYTES (uint16_t uhADCxConvertedData[ADC_CONVERTED_DATA_BUFFER_SIZE]);
uint16_t uhADCxConvertedData_Voltage_mVolt[ADC_CONVERTED_DATA_BUFFER_SIZE];
//0: DMA transfer is not completed
//1: DMA transfer is completed
//2: DMA transfer has not yet been started yet (initial state)
__IO uint8_t ubDmaTransferStatus = 2U;

/* USER CODE END PD */

/* Private macro -------------------------------------------------------------*/
/* USER CODE BEGIN PM */

/* USER CODE END PM */

/* Private variables ---------------------------------------------------------*/
ADC_HandleTypeDef hadc2;
DMA_HandleTypeDef handle_GPDMA1_Channel0;

TIM_HandleTypeDef htim6;

UART_HandleTypeDef huart4;

/* USER CODE BEGIN PV */

/* USER CODE END PV */

/* Private function prototypes -----------------------------------------------*/
static void MPU_Config(void);
static void MX_GPIO_Init(void);
static void MX_GPDMA1_Init(void);
static void MX_TIM6_Init(void);
static void MX_UART4_Init(void);
static void MX_ADC2_Init(void);
/* USER CODE BEGIN PFP */

/* USER CODE END PFP */

/* Private user code ---------------------------------------------------------*/
/* USER CODE BEGIN 0 */

/* USER CODE BEGIN PFP */

int __io_putchar(int ch)
{
    HAL_UART_Transmit(&huart4 , (uint8_t *)&ch, 1, 0xFFFF);
    return ch;
}

/* USER CODE END PFP */

void HAL_ADC_ConvCpltCallback(ADC_HandleTypeDef *hadc)
{
  ubDmaTransferStatus = 1;
}
int tmp_index;

#define FFT_LENGTH ADC_CONVERTED_DATA_BUFFER_SIZE
float fft_inputbuf[FFT_LENGTH * 2];
float fft_outputbuf[FFT_LENGTH];

uint32_t ifftFlag = 0;
uint32_t fftSize = 0;
#define TEST_LENGTH_SAMPLES 1024
static float32_t testOutput_f32[TEST_LENGTH_SAMPLES*2];
static float32_t testOutputMag_f32[TEST_LENGTH_SAMPLES*2];
static float32_t testInput_f32[TEST_LENGTH_SAMPLES*2];
static float32_t Phase_f32[TEST_LENGTH_SAMPLES*2];

void PowerPhaseRadians_f32(float32_t *_ptr, float32_t *_phase, uint16_t _usFFTPoints, float32_t _uiCmpValue)
{
	float32_t lX, lY;
	uint16_t i;
	float32_t phase;
	float32_t mag;
	for (i=0; i <_usFFTPoints; i++)
	{
		lX= _ptr[2*i];//实部
		lY= _ptr[2*i + 1];//虚部

 		phase = atan2f(lY, lX);//atan2求解的结果范围是(-pi, pi], 弧度
		arm_sqrt_f32((float32_t)(lX*lX+ lY*lY), &mag);//求模

		if(_uiCmpValue > mag)
		{
			Phase_f32[i] = 0;
		}
		else
		{
			Phase_f32[i] = phase* 180.0f/3.1415926f;//将求解的结果由弧度转换为角度
		}
	}
}

static void arm_rfft_f32_app2(void)
{
	uint16_t i;
	arm_rfft_fast_instance_f32 S;
	//正变换
    ifftFlag = 0;
	//初始化结构体S中的参数
 	arm_rfft_fast_init_f32(&S, TEST_LENGTH_SAMPLES);
	for(i=0; i<TEST_LENGTH_SAMPLES; i++)
	{
		//测试波形
		//testInput_f32[i] = 1 + cos(2*3.1415926f*50*i/1024 + 3.1415926f/3);
		testInput_f32[i * 2] = uhADCxConvertedData_Voltage_mVolt[i];
		testInput_f32[i * 2 + 1] = 0;//虚部赋值,固定为0.
	}
	//1024点实序列快速变换
	arm_rfft_fast_f32(&S, testInput_f32, testOutput_f32, ifftFlag);
 	arm_cmplx_mag_f32(testOutput_f32, testOutputMag_f32, TEST_LENGTH_SAMPLES);
	PowerPhaseRadians_f32(testOutput_f32, Phase_f32, TEST_LENGTH_SAMPLES, 0.5f);
	//串口打印求解的幅频和相频
	for(i=0; i<TEST_LENGTH_SAMPLES; i++)
	{
		//printf("%f, %f\r\n", testOutputMag_f32[i], Phase_f32[i]);
	}
}

/* USER CODE END 0 */

/**
  * [url=home.php?mod=space&uid=159083]@brief[/url] The application entry point.
  * @retval int
  */
int main(void)
{

  /* USER CODE BEGIN 1 */

  /* USER CODE END 1 */

  /* MPU Configuration--------------------------------------------------------*/
  MPU_Config();

  /* MCU Configuration--------------------------------------------------------*/

  /* Update SystemCoreClock variable according to RCC registers values. */
  SystemCoreClockUpdate();

  /* Reset of all peripherals, Initializes the Flash interface and the Systick. */
  HAL_Init();

  /* USER CODE BEGIN Init */

  init_cycle_counter(true);

  /* USER CODE END Init */

  /* USER CODE BEGIN SysInit */

  /* USER CODE END SysInit */

  /* Initialize all configured peripherals */
  MX_GPIO_Init();
  MX_GPDMA1_Init();
  MX_TIM6_Init();
  MX_UART4_Init();
  MX_ADC2_Init();
  /* USER CODE BEGIN 2 */

	start_cycle_counter();
	//DSP_RMS();

	int64_t lCycleUsed = stop_cycle_counter();

	printf("ADC DMA DEMO\n");
	for (tmp_index = 0; tmp_index < ADC_CONVERTED_DATA_BUFFER_SIZE; tmp_index++)
	{
		uhADCxConvertedData[tmp_index] = VAR_CONVERTED_DATA_INIT_VALUE;
	}
	if (HAL_ADCEx_Calibration_Start(&hadc2, ADC_SINGLE_ENDED) != HAL_OK)
	{
		printf("ADC Calibration Err\n");
		Error_Handler();
	}
	printf("ADC Calibration End\n");

  /* USER CODE END 2 */

  /* Infinite loop */
  /* USER CODE BEGIN WHILE */
  while (1)
  {
    /* USER CODE END WHILE */

    /* USER CODE BEGIN 3 */

	    HAL_GPIO_TogglePin(LD4_GPIO_Port, LD4_Pin);

		  if (HAL_ADC_Start_DMA(&hadc2,(uint32_t *)uhADCxConvertedData,ADC_CONVERTED_DATA_BUFFER_SIZE) != HAL_OK)
		  {
		    Error_Handler();
		  }
		  HAL_Delay(50);
		  if (ubDmaTransferStatus == 1)
		  {
		     ubDmaTransferStatus = 0;
		     for (tmp_index = 0; tmp_index < ADC_CONVERTED_DATA_BUFFER_SIZE; tmp_index++)
		     {
		       uhADCxConvertedData_Voltage_mVolt[tmp_index] = __LL_ADC_CALC_DATA_TO_VOLTAGE(VDDA_APPLI, uhADCxConvertedData[tmp_index], LL_ADC_RESOLUTION_12B);
		       //printf("voltage[%d]=%d mV\n",tmp_index,uhADCxConvertedData_Voltage_mVolt[tmp_index]);
		       //printf("%d\n",uhADCxConvertedData_Voltage_mVolt[tmp_index]);
		       arm_rfft_f32_app2();
		       printf("cycle counter = %lld\n",lCycleUsed);
		     }
		  }
  }
  /* USER CODE END 3 */
}



主要用到的函数是arm_rfft_fast_f32,函数原型是

void arm_rfft_fast_f32(
  const arm_rfft_fast_instance_f32 * S,
  float32_t * p,
  float32_t * pOut,
  uint8_t ifftFlag)

这个函数用于单精度浮点实数FFT,函数有4个参数:

S是FFT实例化句柄,调用函数arm_rfft_fast_init_f32初始化得到,然后供此函数arm_rfft_fast_f32调用。支持32, 64, 128, 256, 512, 1024, 2048, 4096点FFT。
比如做1024点FFT,代码如下:

arm_rfft_fast_instance_f32 S;

arm_rfft_fast_init_f32(&S, 1024);

arm_rfft_fast_f32(&S, testInput_f32, testOutput_f32, ifftFlag);

p是实数地址,比如我们要做1024点实数FFT,要保证有1024个缓冲。

pOut是FFT转换结果,转换结果不是实数了,而是复数,按照实部,虚拟,实部,虚部,依次排列。比如做1024点FFT,这里的输出也会有1024个数据,即512个复位。

ifftFlag用于设置正变换和逆变换,ifftFlag=0表示正变换,ifftFlag=1表示逆变换。

 

测试结果:

屏幕截图 2024-11-11 204833.png

调用arm_rfft_fast_f32计算1024点FFT耗时184个时钟周期,主频配置的是600MHz,即1/600*184=0.307us,下图是大佬测试的STM32F4和F1的数据,可见1024点FFT性能可以说是F4的369.25/0.307=1202倍

屏幕截图 2024-11-11 211639.png

 

回复评论 (3)

1024点数据进行FFT运算看起来有点麻烦,整这么多代码

点赞  2024-11-12 07:28

1024点数据进行FFT运算看起来有点麻烦,整这么多代码

点赞  2024-11-12 08:32
引用: 王1979 发表于 2024-11-12 08:32 **** 作者被禁止或删除 内容自动屏蔽 ****

算法调用不麻烦

点赞  2024-11-14 17:16
电子工程世界版权所有 京B2-20211791 京ICP备10001474号-1 京公网安备 11010802033920号
    写回复