mirror of
https://github.com/sipeed/Maixduino.git
synced 2026-03-06 10:07:01 +01:00
add speech recognition lib
This commit is contained in:
@@ -0,0 +1,20 @@
|
||||
#include "Maix_Speech_Recognition.h"
|
||||
|
||||
SpeechRecognizer rec;
|
||||
|
||||
void setup()
|
||||
{
|
||||
rec.begin();
|
||||
Serial.begin(115200);
|
||||
Serial.println("start rec...");
|
||||
if( rec.record(0, 0) == 0) //keyword_num, model_num
|
||||
{
|
||||
rec.print_model(0, 0);
|
||||
}
|
||||
else
|
||||
Serial.println("rec failed");
|
||||
}
|
||||
|
||||
void loop()
|
||||
{
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
#include "Maix_Speech_Recognition.h"
|
||||
#include "voice_model.h"
|
||||
|
||||
SpeechRecognizer rec;
|
||||
|
||||
void setup()
|
||||
{
|
||||
pinMode(LED_RED, OUTPUT);
|
||||
pinMode(LED_BLUE, OUTPUT);
|
||||
pinMode(LED_GREEN, OUTPUT);
|
||||
rec.begin();
|
||||
Serial.begin(115200);
|
||||
Serial.println("init model...");
|
||||
rec.addVoiceModel(0, 0, hongse_0, fram_num_hongse_0); //36
|
||||
rec.addVoiceModel(0, 1, hongse_1, fram_num_hongse_1); //20
|
||||
rec.addVoiceModel(0, 2, hongse_2, fram_num_hongse_2); //23
|
||||
rec.addVoiceModel(0, 3, hongse_3, fram_num_hongse_3); //21
|
||||
rec.addVoiceModel(1, 0, lvse_0, fram_num_lvse_0); //20
|
||||
rec.addVoiceModel(1, 1, lvse_1, fram_num_lvse_1); //37
|
||||
rec.addVoiceModel(1, 2, lvse_2, fram_num_lvse_2); //19
|
||||
rec.addVoiceModel(1, 3, lvse_3, fram_num_lvse_3); //26
|
||||
rec.addVoiceModel(2, 0, lanse_0, fram_num_lanse_0); //19
|
||||
rec.addVoiceModel(2, 1, lanse_1, fram_num_lanse_1); //21
|
||||
rec.addVoiceModel(2, 2, lanse_2, fram_num_lanse_2); //28
|
||||
rec.addVoiceModel(2, 3, lanse_3, fram_num_lanse_3); //22
|
||||
rec.addVoiceModel(3, 0, guandeng_0, fram_num_guandeng_0); //26
|
||||
rec.addVoiceModel(3, 1, guandeng_1, fram_num_guandeng_1); //29
|
||||
rec.addVoiceModel(3, 2, guandeng_2, fram_num_guandeng_2); //33
|
||||
rec.addVoiceModel(3, 3, guandeng_3, fram_num_guandeng_3); //29
|
||||
Serial.println("init model ok!");
|
||||
}
|
||||
void loop()
|
||||
{
|
||||
int res;
|
||||
res = rec.recognize();
|
||||
Serial.printf("res : %d ", res);
|
||||
if (res > 0){
|
||||
switch (res)
|
||||
{
|
||||
case 1:
|
||||
digitalWrite(LED_RED, LOW); //power on red led
|
||||
digitalWrite(LED_GREEN, HIGH);
|
||||
digitalWrite(LED_BLUE, HIGH);
|
||||
Serial.println("rec : hongse ");
|
||||
break;
|
||||
case 2:
|
||||
digitalWrite(LED_GREEN, LOW); //power on green led
|
||||
digitalWrite(LED_RED, HIGH);
|
||||
digitalWrite(LED_BLUE, HIGH);
|
||||
Serial.println("rec : lvse ");
|
||||
break;
|
||||
case 3:
|
||||
digitalWrite(LED_BLUE, LOW); //power on blue led
|
||||
digitalWrite(LED_RED, HIGH);
|
||||
digitalWrite(LED_GREEN, HIGH);
|
||||
Serial.println("rec : lanse ");
|
||||
break;
|
||||
case 4:
|
||||
digitalWrite(LED_RED, HIGH);
|
||||
digitalWrite(LED_GREEN, HIGH);
|
||||
digitalWrite(LED_BLUE, HIGH); //power off all leds
|
||||
Serial.println("rec : guandeng ");
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}else
|
||||
{
|
||||
Serial.println("recognize failed.");
|
||||
}
|
||||
delay(1000);
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
9
libraries/Maix_Speech_Recognition/library.properties
Normal file
9
libraries/Maix_Speech_Recognition/library.properties
Normal file
@@ -0,0 +1,9 @@
|
||||
name=Maix_Speech_Recognition
|
||||
version=1.0
|
||||
author=BigBits
|
||||
maintainer=BigBits<bigbits@hackx.cc>
|
||||
sentence=Maix Boards Speech Recognition Lib
|
||||
paragraph=Maix Boards Speech Recognition Lib
|
||||
category=Uncategorized
|
||||
url=
|
||||
architectures=k210
|
||||
@@ -0,0 +1,385 @@
|
||||
#include "Maix_Speech_Recognition.h"
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "sysctl.h"
|
||||
#include "plic.h"
|
||||
#include "uarths.h"
|
||||
#include "util/g_def.h"
|
||||
#include "i2s.h"
|
||||
#include "fpioa.h"
|
||||
|
||||
#include "util/VAD.h"
|
||||
#include "util/MFCC.h"
|
||||
#include "util/DTW.h"
|
||||
#include "util/flash.h"
|
||||
#include "util/ADC.h"
|
||||
|
||||
#define USART1_printf printf
|
||||
|
||||
uint16_t VcBuf[atap_len];
|
||||
atap_tag atap_arg;
|
||||
valid_tag valid_voice[max_vc_con];
|
||||
v_ftr_tag ftr;
|
||||
v_ftr_tag ftr_temp;
|
||||
v_ftr_tag ftr_mdl_temp[10];
|
||||
v_ftr_tag *pftr_mdl_temp[10];
|
||||
|
||||
#define save_ok 0
|
||||
#define VAD_fail 1
|
||||
#define MFCC_fail 2
|
||||
#define Flash_fail 3
|
||||
|
||||
#define FFT_N 512
|
||||
|
||||
uint16_t rx_buf[FRAME_LEN];
|
||||
uint32_t g_rx_dma_buf[FRAME_LEN * 2];
|
||||
uint64_t fft_out_data[FFT_N / 2];
|
||||
|
||||
volatile uint32_t g_index;
|
||||
volatile uint8_t uart_rec_flag;
|
||||
volatile uint32_t receive_char;
|
||||
volatile uint8_t i2s_rec_flag;
|
||||
volatile uint8_t i2s_start_flag = 0;
|
||||
|
||||
int i2s_dma_irq(void *ctx)
|
||||
{
|
||||
uint32_t i;
|
||||
if(i2s_start_flag)
|
||||
{
|
||||
int16_t s_tmp;
|
||||
if(g_index)
|
||||
{
|
||||
i2s_receive_data_dma(I2S_DEVICE_0, &g_rx_dma_buf[g_index], frame_mov * 2, DMAC_CHANNEL3);
|
||||
g_index = 0;
|
||||
for(i = 0; i < frame_mov; i++)
|
||||
{
|
||||
s_tmp = (int16_t)(g_rx_dma_buf[2 * i] & 0xffff); //g_rx_dma_buf[2 * i + 1] Low left
|
||||
rx_buf[i] = s_tmp + 32768;
|
||||
}
|
||||
i2s_rec_flag = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
i2s_receive_data_dma(I2S_DEVICE_0, &g_rx_dma_buf[0], frame_mov * 2, DMAC_CHANNEL3);
|
||||
g_index = frame_mov * 2;
|
||||
for(i = frame_mov; i < frame_mov * 2; i++)
|
||||
{
|
||||
s_tmp = (int16_t)(g_rx_dma_buf[2 * i] & 0xffff);//g_rx_dma_buf[2 * i + 1] Low left
|
||||
rx_buf[i] = s_tmp + 32768;
|
||||
}
|
||||
i2s_rec_flag = 2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
i2s_receive_data_dma(I2S_DEVICE_0, &g_rx_dma_buf[0], frame_mov * 2, DMAC_CHANNEL3);
|
||||
g_index = frame_mov * 2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
SpeechRecognizer::SpeechRecognizer(){
|
||||
|
||||
}
|
||||
|
||||
SpeechRecognizer::~SpeechRecognizer(){
|
||||
|
||||
}
|
||||
|
||||
int
|
||||
SpeechRecognizer::begin()
|
||||
{
|
||||
//io_mux_init
|
||||
fpioa_set_function(20, FUNC_I2S0_IN_D0);
|
||||
fpioa_set_function(18, FUNC_I2S0_SCLK);
|
||||
fpioa_set_function(19, FUNC_I2S0_WS);
|
||||
|
||||
//i2s init
|
||||
i2s_init(I2S_DEVICE_0, I2S_RECEIVER, 0x3);
|
||||
|
||||
i2s_rx_channel_config(I2S_DEVICE_0, I2S_CHANNEL_0,
|
||||
RESOLUTION_16_BIT, SCLK_CYCLES_32,
|
||||
TRIGGER_LEVEL_4, STANDARD_MODE);
|
||||
|
||||
i2s_set_sample_rate(I2S_DEVICE_0, 8000);
|
||||
|
||||
dmac_init();
|
||||
dmac_set_irq(DMAC_CHANNEL3, i2s_dma_irq, NULL, 3);
|
||||
i2s_receive_data_dma(I2S_DEVICE_0, &g_rx_dma_buf[0], frame_mov * 2, DMAC_CHANNEL3);
|
||||
|
||||
/* Enable the machine interrupt */
|
||||
sysctl_enable_irq();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
SpeechRecognizer::record(uint8_t keyword_num, uint8_t model_num)
|
||||
{
|
||||
if (keyword_num > 10) return -1;
|
||||
if (model_num > 4) return -2;
|
||||
|
||||
comm = keyword_num;
|
||||
uint8_t prc_count = model_num;
|
||||
uint32_t addr = 0;
|
||||
|
||||
g_index = 0;
|
||||
i2s_rec_flag = 0;
|
||||
i2s_start_flag = 1;
|
||||
|
||||
addr = ftr_start_addr + comm * size_per_comm + prc_count * size_per_ftr;
|
||||
|
||||
if (save_mdl(VcBuf, addr) == save_ok) {
|
||||
return 0;
|
||||
}else{
|
||||
return -3;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int
|
||||
SpeechRecognizer::recognize()
|
||||
{
|
||||
u8 res;
|
||||
u32 dis;
|
||||
u32 recg_count = 0;
|
||||
|
||||
g_index = 0;
|
||||
i2s_rec_flag = 0;
|
||||
i2s_start_flag = 1;
|
||||
|
||||
res = spch_recg(VcBuf, &dis);
|
||||
if(dis != dis_err)
|
||||
return res;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
int
|
||||
SpeechRecognizer::addVoiceModel(uint8_t keyword_num, uint8_t model_num, const int16_t *voice_model, uint16_t frame_num)
|
||||
{
|
||||
ftr_save[keyword_num * 4 + model_num].save_sign = save_mask;
|
||||
ftr_save[keyword_num * 4 + model_num].frm_num = frame_num;
|
||||
|
||||
for (int i = 0; i < (vv_frm_max * mfcc_num); i++)
|
||||
ftr_save[keyword_num * 4 + model_num].mfcc_dat[i] = voice_model[i];
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
SpeechRecognizer::print_model(uint8_t keyword_num, uint8_t model_num)
|
||||
{
|
||||
printf("frm_num=%d\n", ftr_save[keyword_num*4 + model_num].frm_num);
|
||||
for (int i = 0; i < (vv_frm_max * mfcc_num); i++) {
|
||||
if (((i + 1) % 35) == 0)
|
||||
printf("%d,\n", ftr_save[keyword_num*4 + model_num].mfcc_dat[i]);
|
||||
else
|
||||
printf("%d, ", ftr_save[keyword_num*4 + model_num].mfcc_dat[i]);
|
||||
}
|
||||
printf("\nprint model ok!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint8_t SpeechRecognizer::save_mdl(uint16_t *v_dat, uint32_t addr)
|
||||
{
|
||||
u16 i, num;
|
||||
u16 frame_index;
|
||||
|
||||
get_noise1:
|
||||
frame_index = 0;
|
||||
num = atap_len / frame_mov;
|
||||
//wait for finish
|
||||
while (1) {
|
||||
while (i2s_rec_flag == 0)
|
||||
continue;
|
||||
if (i2s_rec_flag == 1) {
|
||||
for (i = 0; i < frame_mov; i++)
|
||||
v_dat[frame_mov * frame_index + i] = rx_buf[i];
|
||||
} else {
|
||||
for (i = 0; i < frame_mov; i++)
|
||||
v_dat[frame_mov * frame_index + i] = rx_buf[i + frame_mov];
|
||||
}
|
||||
i2s_rec_flag = 0;
|
||||
frame_index++;
|
||||
if (frame_index >= num)
|
||||
break;
|
||||
}
|
||||
// for(i = 0; i < atap_len; i++)
|
||||
// printf("noise: %d\n", v_dat[i]);
|
||||
noise_atap(v_dat, atap_len, &atap_arg);
|
||||
if (atap_arg.s_thl > 10000) {
|
||||
printf("get noise again...\n");
|
||||
goto get_noise1;
|
||||
}
|
||||
printf("speeking...\n");
|
||||
//wait for finish
|
||||
while (i2s_rec_flag == 0)
|
||||
continue;
|
||||
if (i2s_rec_flag == 1) {
|
||||
for (i = 0; i < frame_mov; i++)
|
||||
v_dat[i + frame_mov] = rx_buf[i];
|
||||
} else {
|
||||
for (i = 0; i < frame_mov; i++)
|
||||
v_dat[i + frame_mov] = rx_buf[i + frame_mov];
|
||||
}
|
||||
i2s_rec_flag = 0;
|
||||
while (1) {
|
||||
while (i2s_rec_flag == 0)
|
||||
continue;
|
||||
if (i2s_rec_flag == 1) {
|
||||
for (i = 0; i < frame_mov; i++) {
|
||||
v_dat[i] = v_dat[i + frame_mov];
|
||||
v_dat[i + frame_mov] = rx_buf[i];
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < frame_mov; i++) {
|
||||
v_dat[i] = v_dat[i + frame_mov];
|
||||
v_dat[i + frame_mov] = rx_buf[i + frame_mov];
|
||||
}
|
||||
}
|
||||
i2s_rec_flag = 0;
|
||||
if (VAD2(v_dat, valid_voice, &atap_arg) == 1)
|
||||
break;
|
||||
if (receive_char == 's')
|
||||
return MFCC_fail;
|
||||
}
|
||||
// if (valid_voice[0].end == ((void *)0)) {
|
||||
// printf("VAD_fail\n");
|
||||
// return VAD_fail;
|
||||
// }
|
||||
|
||||
get_mfcc(&(valid_voice[0]), &ftr, &atap_arg);
|
||||
if (ftr.frm_num == 0) {
|
||||
//printf("MFCC_fail\n");
|
||||
return MFCC_fail;
|
||||
}
|
||||
// ftr.word_num = valid_voice[0].word_num;
|
||||
return save_ftr_mdl(&ftr, addr);
|
||||
// ftr_mdl_temp[addr] = ftr;
|
||||
// return save_ok;
|
||||
}
|
||||
|
||||
uint8_t SpeechRecognizer::spch_recg(uint16_t *v_dat, uint32_t *mtch_dis)
|
||||
{
|
||||
u16 i;
|
||||
u32 ftr_addr;
|
||||
u32 min_dis;
|
||||
u16 min_comm;
|
||||
u32 cur_dis;
|
||||
v_ftr_tag *ftr_mdl;
|
||||
u16 num;
|
||||
u16 frame_index;
|
||||
uint32_t cycle0, cycle1;
|
||||
|
||||
get_noise2:
|
||||
frame_index = 0;
|
||||
num = atap_len / frame_mov;
|
||||
//wait for finish
|
||||
i2s_rec_flag = 0;
|
||||
while (1) {
|
||||
while (i2s_rec_flag == 0)
|
||||
continue;
|
||||
if (i2s_rec_flag == 1) {
|
||||
for (i = 0; i < frame_mov; i++)
|
||||
v_dat[frame_mov * frame_index + i] = rx_buf[i];
|
||||
} else {
|
||||
for (i = 0; i < frame_mov; i++)
|
||||
v_dat[frame_mov * frame_index + i] = rx_buf[i + frame_mov];
|
||||
}
|
||||
i2s_rec_flag = 0;
|
||||
frame_index++;
|
||||
if (frame_index >= num)
|
||||
break;
|
||||
}
|
||||
noise_atap(v_dat, atap_len, &atap_arg);
|
||||
if (atap_arg.s_thl > 10000) {
|
||||
printf("get noise again...\n");
|
||||
goto get_noise2;
|
||||
}
|
||||
|
||||
printf("speeking...\n");
|
||||
|
||||
//wait for finish
|
||||
while (i2s_rec_flag == 0)
|
||||
continue;
|
||||
if (i2s_rec_flag == 1) {
|
||||
for (i = 0; i < frame_mov; i++)
|
||||
v_dat[i + frame_mov] = rx_buf[i];
|
||||
} else {
|
||||
for (i = 0; i < frame_mov; i++)
|
||||
v_dat[i + frame_mov] = rx_buf[i + frame_mov];
|
||||
}
|
||||
i2s_rec_flag = 0;
|
||||
while (1) {
|
||||
while (i2s_rec_flag == 0)
|
||||
continue;
|
||||
if (i2s_rec_flag == 1) {
|
||||
for (i = 0; i < frame_mov; i++) {
|
||||
v_dat[i] = v_dat[i + frame_mov];
|
||||
v_dat[i + frame_mov] = rx_buf[i];
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < frame_mov; i++) {
|
||||
v_dat[i] = v_dat[i + frame_mov];
|
||||
v_dat[i + frame_mov] = rx_buf[i + frame_mov];
|
||||
}
|
||||
}
|
||||
i2s_rec_flag = 0;
|
||||
if (VAD2(v_dat, valid_voice, &atap_arg) == 1)
|
||||
break;
|
||||
if (receive_char == 's') {
|
||||
*mtch_dis = dis_err;
|
||||
printf("send 'c' to start\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
printf("vad ok\n");
|
||||
// if (valid_voice[0].end == ((void *)0)) {
|
||||
// *mtch_dis=dis_err;
|
||||
// USART1_printf("VAD fail ");
|
||||
// return (void *)0;
|
||||
// }
|
||||
|
||||
get_mfcc(&(valid_voice[0]), &ftr, &atap_arg);
|
||||
if (ftr.frm_num == 0) {
|
||||
*mtch_dis = dis_err;
|
||||
USART1_printf("MFCC fail ");
|
||||
return 0;
|
||||
}
|
||||
// for (i = 0; i < ftr.frm_num * mfcc_num; i++) {
|
||||
// if (i % 12 == 0)
|
||||
// printf("\n");
|
||||
// printf("%d ", ftr.mfcc_dat[i]);
|
||||
// }
|
||||
// ftr.word_num = valid_voice[0].word_num;
|
||||
printf("mfcc ok\n");
|
||||
i = 0;
|
||||
min_comm = 0;
|
||||
min_dis = dis_max;
|
||||
cycle0 = read_csr(mcycle);
|
||||
for (ftr_addr = ftr_start_addr; ftr_addr < ftr_end_addr; ftr_addr += size_per_ftr) {
|
||||
// ftr_mdl=(v_ftr_tag*)ftr_addr;
|
||||
ftr_mdl = (v_ftr_tag *)(&ftr_save[ftr_addr / size_per_ftr]);
|
||||
cur_dis = ((ftr_mdl->save_sign) == save_mask) ? dtw(ftr_mdl, &ftr) : dis_err;
|
||||
if ((ftr_mdl->save_sign) == save_mask) {
|
||||
USART1_printf("no. %d, frm_num = %d, save_mask=%d", i + 1, ftr_mdl->frm_num, ftr_mdl->save_sign);
|
||||
USART1_printf("cur_dis=%d\n", cur_dis);
|
||||
}
|
||||
if (cur_dis < min_dis) {
|
||||
min_dis = cur_dis;
|
||||
min_comm = i + 1;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
cycle1 = read_csr(mcycle) - cycle0;
|
||||
printf("[INFO] recg cycle = 0x%08x\n", cycle1);
|
||||
if (min_comm % 4)
|
||||
min_comm = min_comm / ftr_per_comm + 1;
|
||||
else
|
||||
min_comm = min_comm / ftr_per_comm;
|
||||
//USART1_printf("recg end ");
|
||||
*mtch_dis = min_dis;
|
||||
return (u8)min_comm;//(commstr[min_comm].str);
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
#ifndef __MAIX_SPEECH_RECOGNITION_H
|
||||
#define __MAIX_SPEECH_RECOGNITION_H
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sysctl.h"
|
||||
#include "plic.h"
|
||||
#include "uarths.h"
|
||||
#include "util/g_def.h"
|
||||
#include "i2s.h"
|
||||
|
||||
#include "util/VAD.h"
|
||||
#include "util/MFCC.h"
|
||||
#include "util/DTW.h"
|
||||
#include "util/flash.h"
|
||||
#include "util/ADC.h"
|
||||
|
||||
|
||||
class SpeechRecognizer{
|
||||
public:
|
||||
SpeechRecognizer();
|
||||
~SpeechRecognizer();
|
||||
int begin(); //初始化i2s
|
||||
int record(uint8_t keyword_num, uint8_t model_num); //记录关键词
|
||||
int recognize(); //识别,返回关键词号
|
||||
int addVoiceModel(uint8_t keyword_num, uint8_t model_num, const int16_t *voice_model, uint16_t frame_num);
|
||||
int print_model(uint8_t keyword_num, uint8_t model_num);
|
||||
private:
|
||||
uint8_t save_mdl(uint16_t *v_dat, uint32_t addr);
|
||||
uint8_t spch_recg(uint16_t *v_dat, uint32_t *mtch_dis);
|
||||
|
||||
private:
|
||||
uint8_t comm; //关键词号
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
22
libraries/Maix_Speech_Recognition/src/util/ADC.h
Normal file
22
libraries/Maix_Speech_Recognition/src/util/ADC.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#ifndef _ADC_H
|
||||
#define _ADC_H
|
||||
|
||||
|
||||
#define fs 8000 //ADC采样率 Hz 8000
|
||||
#define voice_len 3000 //录音时间长度 单位ms
|
||||
#define VcBuf_Len ((fs/1000)*voice_len) //语音缓存区长度 单位点数 每个采样点16位
|
||||
#define atap_len_t 300 //背景噪音自适应时间长度 ms
|
||||
#define atap_len ((fs/1000)*atap_len_t) //背景噪音自适应长度
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void ADC_DMA_Init(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
434
libraries/Maix_Speech_Recognition/src/util/DTW.c
Normal file
434
libraries/Maix_Speech_Recognition/src/util/DTW.c
Normal file
@@ -0,0 +1,434 @@
|
||||
/******* DTW.C ********/
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "g_def.h"
|
||||
#include "ADC.h"
|
||||
#include "VAD.h"
|
||||
#include "MFCC.h"
|
||||
#include "DTW.h"
|
||||
|
||||
/*
|
||||
* DTW算法 通过局部优化的方法实现加权距离总和最小
|
||||
|
||||
时间规整函数:
|
||||
C={c(1),c(2),…,c(N)}
|
||||
N为路径长度,c(n)=(i(n),j(n))表示第n个匹配点是有参考模板的
|
||||
第i(n)个特征矢量与待测模板的第j(n)个特征矢量构成的匹配点对,两
|
||||
者之间的距离d(x(i(n)),y(j(n)))称为匹配距离。
|
||||
时间规整函数满足一下约束:
|
||||
1.单调性,规整函数单调增加。
|
||||
2.起点终点约束,起点对起点,终点对终点。
|
||||
3.连续性,不允许跳过任何一点。
|
||||
4.最大规整量不超过某一极限值。|i(n)-j(n)|<M,M为窗宽。规整
|
||||
函数所处的区域位于平行四边形内。局部路径约束,用于限制当第n步
|
||||
为(i(n),j(n))时,前几步存在几种可能的路径。
|
||||
|
||||
DTW步骤:
|
||||
1.初始化。令i(0)=j(0)=0,i(N)=in_frm_num,j(N)=mdl_frm_num.
|
||||
确定一个平行四边形,有两个位于(0,0)和(in_frm_num,mdl_frm_num)的顶点,相邻斜边斜
|
||||
率分别是2和1/2.规整函数不可超出此平行四边形。
|
||||
2.递推求累计距离。
|
||||
|
||||
若输入特征与特征模板的帧数差别过大,直接将匹配距离设为最大
|
||||
frm_in_num<(frm_mdl_num/2)||frm_in_num>(frm_mdl_num*2)
|
||||
*/
|
||||
int64_t avr_in[mfcc_num];
|
||||
int64_t avr_mdl[mfcc_num];
|
||||
|
||||
int64_t standard_in[mfcc_num];
|
||||
int64_t standard_mdl[mfcc_num];
|
||||
int dtw_data[vv_frm_max*vv_frm_max];
|
||||
|
||||
struct pointOritation//节点方向,用来回溯每个W点
|
||||
{
|
||||
int frontI, frontJ;
|
||||
};
|
||||
int g[vv_frm_max][vv_frm_max];
|
||||
struct pointOritation pOritation[vv_frm_max][vv_frm_max];//用来存放
|
||||
|
||||
void gArray(int *p, int n, int m, int *g, struct pointOritation *pr)
|
||||
{
|
||||
int i = 0, j = 0;
|
||||
|
||||
*g = (*p) * 2;//起始点(最左上角的点)
|
||||
for (i = 1; i < m; i++) {//最上面一横
|
||||
*(g + i) = *(g + i - 1) + *(p + i);
|
||||
(pr + i)->frontI = 0;
|
||||
(pr + i)->frontJ = i - 1;
|
||||
}
|
||||
|
||||
for (i = 1; i < n; i++) {//最左边的一竖
|
||||
*(g+i*m+0) = *(g+(i-1)*m+0)+(*(p+i*m+0));
|
||||
(pr+i*m+0)->frontI = i-1;
|
||||
(pr+i*m+0)->frontJ = 0;
|
||||
}
|
||||
|
||||
//计算剩余网格的G值
|
||||
for (i = 1; i < n; i++) {
|
||||
for (j = 1; j < m; j++) {
|
||||
int left, up, incline;
|
||||
|
||||
left = *(g + i*m+j-1) + *(p + i * m + j);
|
||||
up = *(g + (i - 1) * m + j) + *(p + i * m + j);
|
||||
incline = *(g + (i - 1) * m + j - 1) + (*(p + i * m + j)) * 2;
|
||||
|
||||
//从左、上、斜三个方向选出最小的
|
||||
int min = left;
|
||||
|
||||
*(g + i * m + j) = min;
|
||||
(pr + i * m + j)->frontI = i;
|
||||
(pr + i * m + j)->frontJ = j - 1;
|
||||
|
||||
if (min > up) {
|
||||
min = up;
|
||||
*(g + i * m + j) = min;
|
||||
(pr + i * m + j)->frontI = i - 1;
|
||||
(pr + i * m + j)->frontJ = j;
|
||||
}
|
||||
if (min > incline) {
|
||||
min = incline;
|
||||
*(g + i * m + j) = min;
|
||||
(pr + i * m + j)->frontI = i - 1;
|
||||
(pr + i * m + j)->frontJ = j - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
//输出G数组
|
||||
for (i = 0; i < n; i++) {
|
||||
for (j = 0; j < m; j++)
|
||||
printf("%d, ", *(g+i*m+j));
|
||||
printf("\n");
|
||||
}
|
||||
//输出方向数组
|
||||
for (i = 0; i < n; i++) {
|
||||
for (j = 0; j < m; j++)
|
||||
printf("(%d,%d), ", (pr+i*m+j)->frontI, (pr+i*m+j)->frontJ);
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
int printPath(struct pointOritation *po, int n, int m, int *g)
|
||||
{
|
||||
//从最后一个点向前输出路径节点
|
||||
int i = n-1, j = m - 1;
|
||||
int step = 0;
|
||||
|
||||
while (1) {
|
||||
int ii = (po + i * m + j)->frontI, jj = (po + i * m + j)->frontJ;
|
||||
|
||||
if (i == 0 && j == 0)
|
||||
break;
|
||||
// printf("(%d,%d):%d\n",i,j,*(g+i*m+j));
|
||||
i = ii;
|
||||
j = jj;
|
||||
step++;
|
||||
}
|
||||
// printf("distance1 = %d\n", *(g+(n-1)*m + m -1) / step);
|
||||
return step;
|
||||
//printf("distance = %d\n", *(g+(n-1)*m + m -1) / step);
|
||||
}
|
||||
|
||||
/*
|
||||
* 获取两个特征矢量之间的距离
|
||||
参数
|
||||
frm_ftr1 特征矢量1
|
||||
frm_ftr2 特征矢量2
|
||||
返回值
|
||||
dis 矢量距离
|
||||
*/
|
||||
u32 get_dis(s16 *frm_ftr1, s16 *frm_ftr2)
|
||||
{
|
||||
u8 i;
|
||||
|
||||
#if 0
|
||||
#if 1
|
||||
u32 dis;
|
||||
s32 dif; //两矢量相同维度上的差值
|
||||
|
||||
dis = 0;
|
||||
for (i = 0; i < mfcc_num; i++) {
|
||||
//USART1_printf("dis=%d ",dis);
|
||||
dif = frm_ftr1[i]-frm_ftr2[i];
|
||||
dis += (dif*dif);
|
||||
}
|
||||
//USART1_printf("dis=%d ",dis);
|
||||
dis = sqrtf(dis);
|
||||
//USART1_printf("%d\r\n",dis);
|
||||
return dis;
|
||||
#else
|
||||
u32 dis;
|
||||
s32 dif;
|
||||
s32 avr;
|
||||
|
||||
dis = 0;
|
||||
for (i = 0; i < mfcc_num; i++) {
|
||||
//USART1_printf("dis=%d ",dis);
|
||||
avr = (frm_ftr1[i]+frm_ftr2[i]) / 2;
|
||||
dif = sqrtf((frm_ftr1[i] - avr) * (frm_ftr1[i] - avr) + (frm_ftr2[i] - avr) * (frm_ftr2[i] - avr));
|
||||
|
||||
dif = (frm_ftr1[i]-frm_ftr2[i]) * 100 / dif;
|
||||
|
||||
dis += (dif*dif);
|
||||
}
|
||||
//USART1_printf("dis=%d ",dis);
|
||||
dis = sqrtf(dis);
|
||||
//USART1_printf("%d\r\n",dis);
|
||||
return dis;
|
||||
#endif
|
||||
#else
|
||||
#if 1
|
||||
int64_t dif = 0;
|
||||
int64_t dif_a = 0;
|
||||
int64_t dif_b = 0;
|
||||
int32_t dif_r = 0;
|
||||
|
||||
for (i = 0; i < mfcc_num; i++) {
|
||||
dif += (frm_ftr1[i]*frm_ftr2[i]);
|
||||
|
||||
dif_a += (frm_ftr1[i] * frm_ftr1[i]);
|
||||
dif_b += (frm_ftr2[i] * frm_ftr2[i]);
|
||||
}
|
||||
dif_r = 1000 - (dif * 1000 / sqrt(dif_a * dif_b));
|
||||
|
||||
return dif_r;
|
||||
#else
|
||||
int64_t dif = 0;
|
||||
int64_t dif_a = 0;
|
||||
int64_t dif_b = 0;
|
||||
int32_t dif_r = 0;
|
||||
|
||||
for (i = 0; i < mfcc_num; i++) {
|
||||
dif += ((frm_ftr1[i] - avr_mdl[i]) * (frm_ftr2[i] - avr_in[i]));
|
||||
dif_a += ((frm_ftr1[i] - avr_mdl[i]) * (frm_ftr1[i] - avr_mdl[i]));
|
||||
dif_b += ((frm_ftr2[i] - avr_in[i]) * (frm_ftr2[i] - avr_in[i]));
|
||||
}
|
||||
dif_r = 1000 - (dif * 1000 / (sqrt(dif_a * dif_b)));
|
||||
|
||||
return dif_r;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
//平行四边形两外两顶点 X坐标值
|
||||
static u16 X1; //上边交点
|
||||
static u16 X2; //下边交点
|
||||
static int in_frm_num; //输入特征帧数
|
||||
static int mdl_frm_num;//特征模板帧数
|
||||
|
||||
#define ins 0
|
||||
#define outs 1
|
||||
|
||||
/*
|
||||
* 范围控制
|
||||
|
||||
*/
|
||||
u8 dtw_limit(u16 x, u16 y)
|
||||
{
|
||||
if (x < X1) {
|
||||
if (y >= ((2*x)+2))
|
||||
return outs;
|
||||
} else {
|
||||
if ((2*y+in_frm_num-2*mdl_frm_num) >= (x+4))
|
||||
return outs;
|
||||
}
|
||||
|
||||
if (x < X2) {
|
||||
if ((2*y+2) <= x)
|
||||
return outs;
|
||||
} else {
|
||||
if ((y+4) <= (2*x+mdl_frm_num-2*in_frm_num))
|
||||
return outs;
|
||||
}
|
||||
|
||||
return ins;
|
||||
}
|
||||
|
||||
/*
|
||||
* DTW 动态时间规整
|
||||
参数
|
||||
ftr_in :输入特征值
|
||||
ftr_mdl :特征模版
|
||||
返回值
|
||||
dis :累计匹配距离
|
||||
*/
|
||||
|
||||
u32 dtw(v_ftr_tag *ftr_in, v_ftr_tag *frt_mdl)
|
||||
{
|
||||
u32 dis;
|
||||
// u16 x, y;
|
||||
u16 step;
|
||||
s16 *in;
|
||||
s16 *mdl;
|
||||
// u32 d_right_up, right, right_up; //up,
|
||||
// u32 min;
|
||||
int i, j;
|
||||
|
||||
in_frm_num = ftr_in->frm_num;
|
||||
mdl_frm_num = frt_mdl->frm_num;
|
||||
|
||||
if ((in_frm_num > (mdl_frm_num*3)) || ((3*in_frm_num) < mdl_frm_num)) {
|
||||
//USART1_printf("in_frm_num=%d mdl_frm_num=%d\r\n", in_frm_num,mdl_frm_num);
|
||||
return dis_err;
|
||||
} else {
|
||||
// 计算约束平行四边形顶点值
|
||||
X1 = (2*mdl_frm_num-in_frm_num)/3;
|
||||
X2 = (4*in_frm_num-2*mdl_frm_num)/3;
|
||||
in = ftr_in->mfcc_dat;
|
||||
mdl = frt_mdl->mfcc_dat;
|
||||
#if 0
|
||||
for (j = 0; j < mfcc_num; j++) {
|
||||
avr_in[j] = 0;
|
||||
avr_mdl[j] = 0;
|
||||
}
|
||||
|
||||
for (j = 0; j < mfcc_num; j++)
|
||||
for (i = 0; i < in_frm_num; i++)
|
||||
avr_in[j] += in[mfcc_num * i + j];
|
||||
for (j = 0; j < mfcc_num; j++)
|
||||
avr_in[j] = avr_in[j] / in_frm_num;
|
||||
//
|
||||
for (j = 0; j < mfcc_num; j++)
|
||||
for (i = 0; i < mdl_frm_num; i++)
|
||||
avr_mdl[j] += mdl[mfcc_num * i + j];
|
||||
for (j = 0; j < mfcc_num; j++)
|
||||
avr_mdl[j] = avr_mdl[j] / mdl_frm_num;
|
||||
|
||||
dis = get_dis(in, mdl);
|
||||
x = 1;
|
||||
y = 1;
|
||||
step = 1;
|
||||
#endif
|
||||
#if 0
|
||||
for (i = 0; i < in_frm_num; i++) {
|
||||
for (j = 0; j < mdl_frm_num; j++) {
|
||||
printf("%d,", get_dis(mdl, in));
|
||||
mdl += mfcc_num;
|
||||
}
|
||||
mdl = frt_mdl->mfcc_dat;
|
||||
in += mfcc_num;
|
||||
printf("\n");
|
||||
}
|
||||
in = ftr_in->mfcc_dat;
|
||||
mdl = frt_mdl->mfcc_dat;
|
||||
#endif
|
||||
for (i = 0; i < in_frm_num; i++) {
|
||||
for (j = 0; j < mdl_frm_num; j++) {
|
||||
//dtw_data[i][j] = get_dis(in + (i * mfcc_num), mdl + (j * mfcc_num));
|
||||
//dtw_data[i][j] = get_dis(in, mdl);
|
||||
*(dtw_data+i*mdl_frm_num+j) = get_dis(in, mdl);
|
||||
// printf("%d,", dtw_data[i*mdl_frm_num+j]);
|
||||
mdl += mfcc_num;
|
||||
}
|
||||
//printf("\n");
|
||||
mdl = frt_mdl->mfcc_dat;
|
||||
in += mfcc_num;
|
||||
}
|
||||
gArray(dtw_data, in_frm_num, mdl_frm_num, *g, *pOritation);
|
||||
step = printPath(*pOritation, in_frm_num, mdl_frm_num, *g);
|
||||
//printf("step=%d\r\n",step);
|
||||
dis = *((int *)g+(in_frm_num-1)*mdl_frm_num + mdl_frm_num - 1);
|
||||
// printf("dis=%d step=%d dis/step=%d\r\n",dis,step,dis/step);
|
||||
}
|
||||
return (dis/step); //步长归一化
|
||||
}
|
||||
|
||||
|
||||
void get_mean(s16 *frm_ftr1, s16 *frm_ftr2, s16 *mean)
|
||||
{
|
||||
u8 i;
|
||||
|
||||
for (i = 0; i < mfcc_num; i++) {
|
||||
mean[i] = (frm_ftr1[i]+frm_ftr2[i])/2;
|
||||
// printf("x=%d y=%d ", frm_ftr1[i], frm_ftr2[i]);
|
||||
// printf("mean=%d\r\n", mean[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* 从两特征矢量获取特征模板
|
||||
参数
|
||||
ftr_in1 :输入特征值
|
||||
ftr_in2 :输入特征值
|
||||
ftr_mdl :特征模版
|
||||
返回值
|
||||
dis :累计匹配距离
|
||||
*/
|
||||
|
||||
u32 get_mdl(v_ftr_tag *ftr_in1, v_ftr_tag *ftr_in2, v_ftr_tag *ftr_mdl)
|
||||
{
|
||||
u32 dis;
|
||||
u16 x, y;
|
||||
u16 step;
|
||||
s16 *in1;
|
||||
s16 *in2;
|
||||
s16 *mdl;
|
||||
u32 right, right_up, d_right_up;//up,
|
||||
u32 min;
|
||||
|
||||
in_frm_num = ftr_in1->frm_num;
|
||||
mdl_frm_num = ftr_in2->frm_num;
|
||||
|
||||
if ((in_frm_num > (mdl_frm_num*2)) || ((2*in_frm_num) < mdl_frm_num)) {
|
||||
printf("in_frm_num= %d, mdl_frm_num= %d\n", in_frm_num, mdl_frm_num);
|
||||
return dis_err;
|
||||
} else {
|
||||
// 计算约束平行四边形顶点值
|
||||
X1 = (2*mdl_frm_num-in_frm_num)/3;
|
||||
X2 = (4*in_frm_num-2*mdl_frm_num)/3;
|
||||
in1 = ftr_in1->mfcc_dat;
|
||||
in2 = ftr_in2->mfcc_dat;
|
||||
mdl = ftr_mdl->mfcc_dat;
|
||||
|
||||
dis = get_dis(in1, in2);
|
||||
get_mean(in1, in2, mdl);
|
||||
x = 1;
|
||||
y = 1;
|
||||
step = 1;
|
||||
do {
|
||||
//up = (dtw_limit(x, y+1) == ins)?get_dis(in2+mfcc_num, in1):dis_err;
|
||||
d_right_up = (dtw_limit(x+1, y+2) == ins)?get_dis(in2+mfcc_num+mfcc_num, in1+mfcc_num):dis_err;
|
||||
right = (dtw_limit(x+1, y) == ins)?get_dis(in2, in1+mfcc_num):dis_err;
|
||||
right_up = (dtw_limit(x+1, y+1) == ins)?get_dis(in2+mfcc_num, in1+mfcc_num):dis_err;
|
||||
|
||||
min = right_up;
|
||||
if (min > right)
|
||||
min = right;
|
||||
|
||||
if (min > d_right_up)
|
||||
min = d_right_up;
|
||||
|
||||
dis += min;
|
||||
|
||||
if (min == right_up) {
|
||||
in1 += mfcc_num;
|
||||
x++;
|
||||
in2 += mfcc_num;
|
||||
y++;
|
||||
} else if (min == d_right_up) {
|
||||
//in2 += mfcc_num;
|
||||
//y++;
|
||||
in2 = in2 + mfcc_num + mfcc_num;
|
||||
y += 2;
|
||||
in1 += mfcc_num;
|
||||
x++;
|
||||
} else {
|
||||
in1 += mfcc_num;
|
||||
x++;
|
||||
}
|
||||
step++;
|
||||
|
||||
mdl += mfcc_num;
|
||||
get_mean(in1, in2, mdl);
|
||||
|
||||
// printf("x=%d y=%d\r\n", x, y);
|
||||
} while ((x < in_frm_num) && (y < mdl_frm_num));
|
||||
printf("step=%d\r\n", step);
|
||||
ftr_mdl->frm_num = step;
|
||||
}
|
||||
return (dis/step); //步长归一化
|
||||
}
|
||||
|
||||
20
libraries/Maix_Speech_Recognition/src/util/DTW.h
Normal file
20
libraries/Maix_Speech_Recognition/src/util/DTW.h
Normal file
@@ -0,0 +1,20 @@
|
||||
#ifndef _DTW_H
|
||||
#define _DTW_H
|
||||
#include "g_def.h"
|
||||
|
||||
#define dis_err 0xFFFFFFFF
|
||||
#define dis_max 0xFFFFFFFF
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
u32 dtw(v_ftr_tag *ftr_in, v_ftr_tag *frt_mdl);
|
||||
u32 get_mdl(v_ftr_tag *ftr_in1, v_ftr_tag *ftr_in2, v_ftr_tag *ftr_mdl);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
37
libraries/Maix_Speech_Recognition/src/util/FIR.c
Normal file
37
libraries/Maix_Speech_Recognition/src/util/FIR.c
Normal file
@@ -0,0 +1,37 @@
|
||||
#include <stdio.h>
|
||||
#include "FIR.h"
|
||||
|
||||
const double Gains[39] = {
|
||||
-0.01152483856464, -0.0225638929287, -0.002300804134165, 0.01259363093284,
|
||||
-0.0003113695305127, -0.01591961043518, 0.005297252625722, 0.01846679452201,
|
||||
-0.01074864266549, -0.02187510941784, 0.0192688121099, 0.02485021814652,
|
||||
-0.03198920030056, -0.02754770718169, 0.05351978821001, 0.0296026126381,
|
||||
-0.09977798297944, -0.03089664322888, 0.3161729116138, 0.5313543998522,
|
||||
0.3161729116138, -0.03089664322888, -0.09977798297944, 0.0296026126381,
|
||||
0.05351978821001, -0.02754770718169, -0.03198920030056, 0.02485021814652,
|
||||
0.0192688121099, -0.02187510941784, -0.01074864266549, 0.01846679452201,
|
||||
0.005297252625722, -0.01591961043518, -0.0003113695305127, 0.01259363093284,
|
||||
-0.002300804134165, -0.0225638929287, -0.01152483856464
|
||||
};
|
||||
#define ORDER 38 //阶数
|
||||
|
||||
double Buffer[ORDER + 1]; //采样历史数据
|
||||
|
||||
double Fir(double Input)
|
||||
{
|
||||
double Output = 0.0; //数据输出
|
||||
int Index; //下标索引
|
||||
|
||||
//采样数据移位
|
||||
for (Index = ORDER; Index > 0; Index--)
|
||||
Buffer[Index] = Buffer[Index-1];
|
||||
|
||||
Buffer[0] = Input;
|
||||
|
||||
//计算输出
|
||||
for (Index = 0; Index < ORDER+1; Index++)
|
||||
Output += Gains[Index]*Buffer[Index];
|
||||
|
||||
return Output;
|
||||
}
|
||||
|
||||
15
libraries/Maix_Speech_Recognition/src/util/FIR.h
Normal file
15
libraries/Maix_Speech_Recognition/src/util/FIR.h
Normal file
@@ -0,0 +1,15 @@
|
||||
#ifndef _FIR_H_
|
||||
#define _FIR_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
double Fir(double Input);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
273
libraries/Maix_Speech_Recognition/src/util/MFCC.c
Normal file
273
libraries/Maix_Speech_Recognition/src/util/MFCC.c
Normal file
@@ -0,0 +1,273 @@
|
||||
/******* MFCC.C *******/
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "g_def.h"
|
||||
#include <math.h>
|
||||
#include "ADC.h"
|
||||
#include "VAD.h"
|
||||
#include "MFCC.h"
|
||||
#include "MFCC_Arg.h"
|
||||
#include <float.h>
|
||||
#include "sysctl.h"
|
||||
#include "dmac.h"
|
||||
#include "fft.h"
|
||||
#include "FIR.h"
|
||||
|
||||
void cr4_fft_1024_stm32(void *pssOUT, void *pssIN, u16 Nbin);
|
||||
void normalize(s16 *mfcc_p, u16 frm_num);
|
||||
|
||||
u32 fft_out[mfcc_fft_point];
|
||||
s16 fft_in[mfcc_fft_point];
|
||||
|
||||
extern uint64_t fft_out_data[512 / 2];
|
||||
extern void fft_dma_init(void);
|
||||
extern volatile fft_t *const fft;
|
||||
|
||||
void fft_input_intdata(int16_t *data, uint8_t point)
|
||||
{
|
||||
uint16_t point_num = 0;
|
||||
uint16_t i;
|
||||
fft_data_t input_data;
|
||||
|
||||
if (point == 0)
|
||||
point_num = 512;
|
||||
else if (point == 1)
|
||||
point_num = 256;
|
||||
else if (point == 2)
|
||||
point_num = 128;
|
||||
else if (point == 3)
|
||||
point_num = 64;
|
||||
point_num = point_num / 2; // one time send two data
|
||||
|
||||
for (i = 0; i < point_num; i++) {
|
||||
input_data.R1 = data[2 * i];
|
||||
input_data.I1 = 0;
|
||||
input_data.R2 = data[2 * i + 1];
|
||||
input_data.I2 = 0;
|
||||
|
||||
fft->fft_input_fifo.fft_input_fifo = *(uint64_t *)&input_data;
|
||||
}
|
||||
}
|
||||
|
||||
void fft_sync_data(int16_t *data, uint8_t point, fft_data_t *fft_data)
|
||||
{
|
||||
uint16_t point_num = 0;
|
||||
uint16_t i;
|
||||
|
||||
if (point == 0)
|
||||
point_num = 512;
|
||||
else if (point == 1)
|
||||
point_num = 256;
|
||||
else if (point == 2)
|
||||
point_num = 128;
|
||||
else if (point == 3)
|
||||
point_num = 64;
|
||||
point_num = point_num / 2; // one time send two data
|
||||
|
||||
for (i = 0; i < point_num; i++) {
|
||||
(fft_data + i)->R1 = data[2 * i];
|
||||
(fft_data + i)->I1 = 0;
|
||||
(fft_data + i)->R2 = data[2 * i + 1];
|
||||
(fft_data + i)->I2 = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* cr4_fft_1024_stm32输入参数是有符号数
|
||||
*
|
||||
* cr4_fft_1024_stm32输入参数包括实数和虚数
|
||||
但语音数据只包括实数部分 虚数用0填充
|
||||
fft点数超出输入数据长度时 超过部分用0填充
|
||||
|
||||
cr4_fft_1024_stm32输出数据包括实数和虚数
|
||||
应该取其绝对值 即平方和的根
|
||||
*/
|
||||
|
||||
u32 *mfcc_fft(s16 *dat_buf, u16 buf_len)
|
||||
{
|
||||
u16 i;
|
||||
s32 real, imag;
|
||||
fft_data_t output_data;
|
||||
|
||||
if (buf_len > mfcc_fft_point)
|
||||
return (void *)0;
|
||||
|
||||
for (i = 0; i < buf_len; i++) {
|
||||
fft_in[i] = *(dat_buf+i);//虚部高位 实部低位
|
||||
}
|
||||
for (; i < mfcc_fft_point; i++)
|
||||
fft_in[i] = 0;//超出部分用0填充
|
||||
|
||||
fft_data_t fft_in_buf[512];
|
||||
memset(fft_in_buf, 0 , sizeof(fft_in_buf));
|
||||
fft_sync_data(fft_in, FFT_512, fft_in_buf);
|
||||
fft_complex_uint16_dma(DMAC_CHANNEL0, DMAC_CHANNEL1, 0x1ff, FFT_DIR_FORWARD, (uint64_t *)fft_in_buf, 512, fft_out_data);
|
||||
|
||||
for (i = 0; i < frq_max / 2; i++) {
|
||||
output_data = *(fft_data_t *)&fft_out_data[i];
|
||||
imag = (s16)output_data.I1;
|
||||
real = (s16)output_data.R1;
|
||||
real = real*real+imag*imag;
|
||||
fft_out[2 * i] = sqrtf((float)real)*10;
|
||||
|
||||
imag = (s16)output_data.I2;
|
||||
real = (s16)output_data.R2;
|
||||
|
||||
real = real*real+imag*imag;
|
||||
fft_out[2 * i + 1] = sqrtf((float)real)*10;
|
||||
}
|
||||
return fft_out;
|
||||
}
|
||||
|
||||
/* MFCC:Mel频率倒谱系数
|
||||
*
|
||||
参数:
|
||||
valid 有效语音段起点终点
|
||||
|
||||
返回值:
|
||||
v_ftr MFCC值,帧数
|
||||
|
||||
Mel=2595*lg(1+f/700)
|
||||
1000Hz以下按线性刻度 1000Hz以上按对数刻度
|
||||
三角型滤波器中心频率 在Mel频率刻度上等间距排列
|
||||
预加重:6dB/倍频程 一阶高通滤波器 H(z)=1-uz^(-1) y(n)=x(n)-ux(n-1) u=0.94~0.97
|
||||
|
||||
MFCC 步骤:
|
||||
1.对语音信号预加重、分帧、加汉明窗处理,然后进行短时傅里叶变换,得出频谱
|
||||
2.取频谱平方,得能量谱。并用24个Mel带通滤波器进行滤波,输出Mel功率谱
|
||||
3.对每个滤波器的输出值取对数,得到相应频带的对数功率谱。然后对24个对数功率进行
|
||||
反离散余弦变换得到12个MFCC系数
|
||||
*/
|
||||
|
||||
void get_mfcc(valid_tag *valid, v_ftr_tag *v_ftr, atap_tag *atap_arg)
|
||||
{
|
||||
u16 *vc_dat;
|
||||
u16 h, i;
|
||||
u32 *frq_spct; //频谱
|
||||
s16 vc_temp[FRAME_LEN]; //语音暂存区
|
||||
s32 temp;
|
||||
|
||||
u32 pow_spct[tri_num]; //三角滤波器输出对数功率谱
|
||||
u16 frm_con;
|
||||
s16 *mfcc_p;
|
||||
s8 *dct_p;
|
||||
s32 mid;
|
||||
u16 v_frm_num;
|
||||
|
||||
//USART1_printf("start=%d end=%d",(u32)(valid->start),(u32)(valid->end));
|
||||
v_frm_num = (((u32)(valid->end)-(u32)(valid->start))/2-FRAME_LEN)/(FRAME_LEN-frame_mov)+1;
|
||||
if (v_frm_num > vv_frm_max) {
|
||||
printf("frm_num=%d ", v_frm_num);
|
||||
v_ftr->frm_num = 0;
|
||||
} else {
|
||||
mid = (s32)atap_arg->mid_val;
|
||||
mfcc_p = v_ftr->mfcc_dat;
|
||||
frm_con = 0;
|
||||
//low pass filter
|
||||
// for (vc_dat = (u16 *)(valid->start); vc_dat <= ((u16 *)(valid->end-FRAME_LEN)); vc_dat += 1) {
|
||||
// *vc_dat = (u16)(Fir(*vc_dat));
|
||||
|
||||
// }
|
||||
for (vc_dat = (u16 *)(valid->start); vc_dat <= ((u16 *)(valid->end-FRAME_LEN)); vc_dat += (FRAME_LEN-frame_mov)) {
|
||||
for (i = 0; i < FRAME_LEN; i++) {
|
||||
//预加重
|
||||
// printf("vc_dat[%d]=%d ",i,((s32)(*(vc_dat+i))-mid));
|
||||
temp = ((s32)(*(vc_dat+i))-mid) - hp_ratio(((s32)(*(vc_dat+i-1))-mid));
|
||||
// printf("vc_hp[%d]=%d ",i,temp);
|
||||
//加汉明窗 并放大10倍
|
||||
vc_temp[i] = (s16)(temp*hamm[i]/(hamm_top/10));
|
||||
// printf("vc_hm[%d]=%d\r\n",i,vc_temp[i]);
|
||||
}
|
||||
|
||||
frq_spct = mfcc_fft(vc_temp, FRAME_LEN);
|
||||
|
||||
for (i = 0; i < frq_max; i++) {
|
||||
//printf("frq_spct[%d]=%d ",i,frq_spct[i]);
|
||||
frq_spct[i] *= frq_spct[i];//能量谱
|
||||
//printf("E_spct[%d]=%d\r\n",i,frq_spct[i]);
|
||||
}
|
||||
|
||||
//加三角滤波器
|
||||
pow_spct[0] = 0;
|
||||
for (i = 0; i < tri_cen[1]; i++)
|
||||
pow_spct[0] += (frq_spct[i]*tri_even[i]/(tri_top/10));
|
||||
for (h = 2; h < tri_num; h += 2) {
|
||||
pow_spct[h] = 0;
|
||||
for (i = tri_cen[h-1]; i < tri_cen[h+1]; i++)
|
||||
pow_spct[h] += (frq_spct[i]*tri_even[i]/(tri_top/10));
|
||||
}
|
||||
|
||||
for (h = 1; h < (tri_num-2); h += 2) {
|
||||
pow_spct[h] = 0;
|
||||
for (i = tri_cen[h-1]; i < tri_cen[h+1]; i++)
|
||||
pow_spct[h] += (frq_spct[i]*tri_odd[i]/(tri_top/10));
|
||||
}
|
||||
pow_spct[tri_num-1] = 0;
|
||||
for (i = tri_cen[tri_num-2]; i < (mfcc_fft_point/2); i++)
|
||||
pow_spct[tri_num-1] += (frq_spct[i]*tri_odd[i]/(tri_top/10));
|
||||
|
||||
//三角滤波器输出取对数
|
||||
for (h = 0; h < tri_num; h++) {
|
||||
//USART1_printf("pow_spct[%d]=%d ",h,pow_spct[h]);
|
||||
pow_spct[h] = (u32)(log(pow_spct[h])*100);//取对数后 乘100 提升数据有效位数
|
||||
//USART1_printf("%d\r\n",pow_spct[h]);
|
||||
}
|
||||
|
||||
//反离散余弦变换
|
||||
dct_p = (s8 *)dct_arg;
|
||||
for (h = 0; h < mfcc_num; h++) {
|
||||
mfcc_p[h] = 0;
|
||||
for (i = 0; i < tri_num; i++)
|
||||
mfcc_p[h] += (((s32)pow_spct[i])*((s32)dct_p[i])/100);
|
||||
//printf("%d,",mfcc_p[h]);
|
||||
dct_p += tri_num;
|
||||
}
|
||||
//USART1_printf("\r\n");
|
||||
mfcc_p += mfcc_num;
|
||||
frm_con++;
|
||||
}
|
||||
mfcc_p = v_ftr->mfcc_dat;
|
||||
normalize(mfcc_p, frm_con);
|
||||
v_ftr->frm_num = frm_con;
|
||||
}
|
||||
}
|
||||
|
||||
s16 avg(s16 *mfcc_p, u16 frm_num)
|
||||
{
|
||||
int i, j;
|
||||
double sum = 0.0f;
|
||||
|
||||
printf("frm_num = %d, mfcc_num = %d\n", frm_num, mfcc_num);
|
||||
for (i = 0; i < frm_num; i++)
|
||||
for (j = 0; j < mfcc_num; j++) {
|
||||
sum += mfcc_p[i * mfcc_num + j];
|
||||
// printf("[%d, %d]%f %f ", i, j, sum, mfcc_p[i * mfcc_num + j]);
|
||||
}
|
||||
return (s16)(sum / (frm_num * mfcc_num));
|
||||
}
|
||||
|
||||
s16 stdev(s16 *mfcc_p, s16 avg1, u16 frm_num)
|
||||
{
|
||||
int i, j;
|
||||
double sum = 0.0f;
|
||||
|
||||
for (i = 0; i < frm_num; i++)
|
||||
for (j = 0; j < mfcc_num; j++)
|
||||
sum += (mfcc_p[i * mfcc_num + j] - avg1) * (mfcc_p[i * mfcc_num + j] - avg1);
|
||||
return (s16)(sqrt(sum / (frm_num * mfcc_num)));
|
||||
}
|
||||
|
||||
void normalize(s16 *mfcc_p, u16 frm_num)
|
||||
{
|
||||
int i, j;
|
||||
s16 avg1 = avg(mfcc_p, frm_num);
|
||||
s16 stdev1 = stdev(mfcc_p, avg1, frm_num);
|
||||
|
||||
printf("avg1 = %d, stdev1 = %d\n", avg1, stdev1);
|
||||
for (i = 0; i < frm_num; i++)
|
||||
for (j = 0; j < mfcc_num; j++)
|
||||
mfcc_p[i * mfcc_num + j] = (mfcc_p[i * mfcc_num + j] - avg1) * 100 / stdev1;
|
||||
}
|
||||
39
libraries/Maix_Speech_Recognition/src/util/MFCC.h
Normal file
39
libraries/Maix_Speech_Recognition/src/util/MFCC.h
Normal file
@@ -0,0 +1,39 @@
|
||||
#ifndef _MFCC_H
|
||||
#define _MFCC_H
|
||||
#include "VAD.h"
|
||||
#include "g_def.h"
|
||||
|
||||
#define hp_ratio(x) (x*95/100)//预加重系数 0.95
|
||||
#define mfcc_fft_point 512 //FFT点数
|
||||
#define frq_max (mfcc_fft_point/2) //最大频率
|
||||
#define hamm_top 10000 //汉明窗最大值
|
||||
#define tri_top 1000 //三角滤波器顶点值
|
||||
#define tri_num 24 //三角滤波器个数
|
||||
//#define tri_num 17 //三角滤波器个数
|
||||
#define mfcc_num 12 //MFCC阶数
|
||||
|
||||
#define vv_tim_max 2200 //单段有效语音最长时间 ms
|
||||
#define vv_frm_max ((vv_tim_max-frame_time)/(frame_time-frame_mov_t)+1) //单段有效语音最长帧数
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
//#pragma pack(4)
|
||||
typedef struct {
|
||||
u16 save_sign; //存储标记 用于判断flash中特征模板是否有效
|
||||
u16 frm_num; //帧数
|
||||
// u8 word_num;
|
||||
s16 mfcc_dat[vv_frm_max*mfcc_num]; //MFCC转换结果
|
||||
// float mfcc_dat[vv_frm_max*mfcc_num];
|
||||
} v_ftr_tag; //语音特征结构体
|
||||
//#pragma pack()
|
||||
|
||||
void get_mfcc(valid_tag *valid, v_ftr_tag *v_ftr, atap_tag *atap_arg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
135
libraries/Maix_Speech_Recognition/src/util/MFCC_Arg.h
Normal file
135
libraries/Maix_Speech_Recognition/src/util/MFCC_Arg.h
Normal file
@@ -0,0 +1,135 @@
|
||||
#ifndef MFCC_Arg
|
||||
#define MFCC_Arg
|
||||
#include "g_def.h"
|
||||
|
||||
//汉明窗 窗长160
|
||||
const u16 hamm[] = {
|
||||
|
||||
800, 804, 814, 832, 857, 889, 929, 975, 1028, 1088, 1155, 1228, 1308, 1394, 1486, 1585, 1689, 1800, 1915, 2037,
|
||||
2163, 2295, 2432, 2573, 2718, 2868, 3022, 3179, 3340, 3504, 3671, 3841, 4013, 4187, 4364, 4542, 4721, 4901, 5082,
|
||||
5264, 5445, 5627, 5808, 5989, 6169, 6348, 6525, 6700, 6873, 7044, 7213, 7378, 7541, 7700, 7856, 8007, 8155, 8298,
|
||||
8437, 8571, 8701, 8825, 8943, 9056, 9164, 9265, 9361, 9450, 9533, 9610, 9680, 9743, 9799, 9849, 9892, 9927, 9956,
|
||||
9978, 9992, 9999, 9999, 9992, 9978, 9956, 9927, 9892, 9849, 9799, 9743, 9680, 9610, 9533, 9450, 9361, 9265, 9164,
|
||||
9056, 8943, 8825, 8701, 8571, 8437, 8298, 8155, 8007, 7856, 7700, 7541, 7378, 7213, 7044, 6873, 6700, 6525, 6348,
|
||||
6169, 5989, 5808, 5627, 5445, 5264, 5082, 4901, 4721, 4542, 4364, 4187, 4013, 3841, 3671, 3504, 3340, 3179, 3022,
|
||||
2868, 2718, 2573, 2432, 2295, 2163, 2037, 1915, 1800, 1689, 1585, 1486, 1394, 1308, 1228, 1155, 1088, 1028, 975,
|
||||
929, 889, 857, 832, 814, 804, 800
|
||||
};
|
||||
|
||||
#if 0 //1024 point fft
|
||||
//三角滤波器中心频率点
|
||||
const u16 tri_cen[] = {
|
||||
|
||||
11, 22, 33, 44, 55, 66, 77, 88, 99, 110, 121, 134, 152, 171, 191, 214, 237, 263, 291, 321, 354, 389, 427, 468
|
||||
};
|
||||
|
||||
//奇数三角滤波器首尾相连的折线
|
||||
const u16 tri_odd[] = {
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 91, 182, 273, 364, 455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545,
|
||||
455, 364, 273, 182, 91, 0, 91, 182, 273, 364, 455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545, 455,
|
||||
364, 273, 182, 91, 0, 91, 182, 273, 364, 455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545, 455, 364,
|
||||
273, 182, 91, 0, 91, 182, 273, 364, 455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545, 455, 364, 273,
|
||||
182, 91, 0, 91, 182, 273, 364, 455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545, 455, 364, 273, 182,
|
||||
91, 0, 77, 154, 231, 308, 385, 462, 538, 615, 692, 769, 846, 923, 1000, 944, 889, 833, 778, 722, 667, 611, 556,
|
||||
500, 444, 389, 333, 278, 222, 167, 111, 56, 0, 53, 105, 158, 211, 263, 316, 368, 421, 474, 526, 579, 632, 684,
|
||||
737, 789, 842, 895, 947, 1000, 950, 900, 850, 800, 750, 700, 650, 600, 550, 500, 450, 400, 350, 300, 250, 200,
|
||||
150, 100, 50, 0, 43, 87, 130, 174, 217, 261, 304, 348, 391, 435, 478, 522, 565, 609, 652, 696, 739, 783, 826,
|
||||
870, 913, 957, 1000, 957, 913, 870, 826, 783, 739, 696, 652, 609, 565, 522, 478, 435, 391, 348, 304, 261, 217,
|
||||
174, 130, 87, 43, 0, 38, 77, 115, 154, 192, 231, 269, 308, 346, 385, 423, 462, 500, 538, 577, 615, 654, 692,
|
||||
731, 769, 808, 846, 885, 923, 962, 1000, 964, 929, 893, 857, 821, 786, 750, 714, 679, 643, 607, 571, 536, 500,
|
||||
464, 429, 393, 357, 321, 286, 250, 214, 179, 143, 107, 71, 36, 0, 33, 67, 100, 133, 167, 200, 233, 267, 300,
|
||||
333, 367, 400, 433, 467, 500, 533, 567, 600, 633, 667, 700, 733, 767, 800, 833, 867, 900, 933, 967, 1000, 970,
|
||||
939, 909, 879, 848, 818, 788, 758, 727, 697, 667, 636, 606, 576, 545, 515, 485, 455, 424, 394, 364, 333, 303,
|
||||
273, 242, 212, 182, 152, 121, 91, 61, 30, 0, 29, 57, 86, 114, 143, 171, 200, 229, 257, 286, 314, 343, 371, 400,
|
||||
429, 457, 486, 514, 543, 571, 600, 629, 657, 686, 714, 743, 771, 800, 829, 857, 886, 914, 943, 971, 1000, 974,
|
||||
947, 921, 895, 868, 842, 816, 789, 763, 737, 711, 684, 658, 632, 605, 579, 553, 526, 500, 474, 447, 421, 395,
|
||||
368, 342, 316, 289, 263, 237, 211, 184, 158, 132, 105, 79, 53, 26, 0, 24, 49, 73, 98, 122, 146, 171, 195, 220,
|
||||
244, 268, 293, 317, 341, 366, 390, 415, 439, 463, 488, 512, 537, 561, 585, 610, 634, 659, 683, 707, 732, 756,
|
||||
780, 805, 829, 854, 878, 902, 927, 951, 976, 1000, 977, 955, 932, 909, 886, 864, 841, 818, 795, 773, 750, 727,
|
||||
705, 682, 659, 636, 614, 591, 568, 545, 523, 500, 477, 455, 432, 409, 386, 364, 341, 318, 295, 273, 250, 227,
|
||||
205, 182, 159, 136, 114, 91, 68, 45, 23, 0
|
||||
};
|
||||
|
||||
//偶数三角滤波器首尾相连的折线
|
||||
const u16 tri_even[] = {
|
||||
|
||||
91, 182, 273, 364, 455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545, 455, 364, 273, 182, 91, 0, 91,
|
||||
182, 273, 364, 455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545, 455, 364, 273, 182, 91, 0, 91, 182,
|
||||
273, 364, 455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545, 455, 364, 273, 182, 91, 0, 91, 182, 273,
|
||||
364, 455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545, 455, 364, 273, 182, 91, 0, 91, 182, 273, 364,
|
||||
455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545, 455, 364, 273, 182, 91, 0, 91, 182, 273, 364, 455,
|
||||
545, 636, 727, 818, 909, 1000, 923, 846, 769, 692, 615, 538, 462, 385, 308, 231, 154, 77, 0, 56, 111, 167, 222,
|
||||
278, 333, 389, 444, 500, 556, 611, 667, 722, 778, 833, 889, 944, 1000, 947, 895, 842, 789, 737, 684, 632, 579,
|
||||
526, 474, 421, 368, 316, 263, 211, 158, 105, 53, 0, 50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600,
|
||||
650, 700, 750, 800, 850, 900, 950, 1000, 957, 913, 870, 826, 783, 739, 696, 652, 609, 565, 522, 478, 435, 391,
|
||||
348, 304, 261, 217, 174, 130, 87, 43, 0, 43, 87, 130, 174, 217, 261, 304, 348, 391, 435, 478, 522, 565, 609, 652,
|
||||
696, 739, 783, 826, 870, 913, 957, 1000, 962, 923, 885, 846, 808, 769, 731, 692, 654, 615, 577, 538, 500, 462,
|
||||
423, 385, 346, 308, 269, 231, 192, 154, 115, 77, 38, 0, 36, 71, 107, 143, 179, 214, 250, 286, 321, 357, 393, 429,
|
||||
464, 500, 536, 571, 607, 643, 679, 714, 750, 786, 821, 857, 893, 929, 964, 1000, 967, 933, 900, 867, 833, 800, 767,
|
||||
733, 700, 667, 633, 600, 567, 533, 500, 467, 433, 400, 367, 333, 300, 267, 233, 200, 167, 133, 100, 67, 33, 0, 30,
|
||||
61, 91, 121, 152, 182, 212, 242, 273, 303, 333, 364, 394, 424, 455, 485, 515, 545, 576, 606, 636, 667, 697, 727,
|
||||
758, 788, 818, 848, 879, 909, 939, 970, 1000, 971, 943, 914, 886, 857, 829, 800, 771, 743, 714, 686, 657, 629, 600,
|
||||
571, 543, 514, 486, 457, 429, 400, 371, 343, 314, 286, 257, 229, 200, 171, 143, 114, 86, 57, 29, 0, 26, 53, 79,
|
||||
105, 132, 158, 184, 211, 237, 263, 289, 316, 342, 368, 395, 421, 447, 474, 500, 526, 553, 579, 605, 632, 658, 684,
|
||||
711, 737, 763, 789, 816, 842, 868, 895, 921, 947, 974, 1000, 976, 951, 927, 902, 878, 854, 829, 805, 780, 756, 732,
|
||||
707, 683, 659, 634, 610, 585, 561, 537, 512, 488, 463, 439, 415, 390, 366, 341, 317, 293, 268, 244, 220, 195, 171,
|
||||
146, 122, 98, 73, 49, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
#endif
|
||||
|
||||
#if 1 //512 point fft
|
||||
//三角滤波器中心频率点
|
||||
const u16 tri_cen[] = {
|
||||
4, 7, 12, 16, 21, 26, 32, 38, 44, 51, 59, 67, 76, 85, 96, 107, 119, 132, 146, 161, 177, 195, 213, 234
|
||||
};
|
||||
|
||||
//奇数三角滤波器首尾相连的折线
|
||||
const u16 tri_odd[] = {
|
||||
|
||||
0, 0, 0, 0, 333, 667, 1000, 800, 600, 400, 200, 0, 250, 500, 750, 1000, 800, 600, 400, 200, 0, 200, 400, 600, 800, 1000, 833, 667, 500, 333,
|
||||
167, 0, 167, 333, 500, 667, 833, 1000, 833, 667, 500, 333, 167, 0, 143, 286, 429, 571, 714, 857, 1000, 875, 750, 625, 500, 375, 250, 125, 0,
|
||||
125, 250, 375, 500, 625, 750, 875, 1000, 889, 778, 667, 556, 444, 333, 222, 111, 0, 111, 222, 333, 444, 556, 667, 778, 889, 1000, 909, 818,
|
||||
727, 636, 545, 455, 364, 273, 182, 91, 0, 91, 182, 273, 364, 455, 545, 636, 727, 818, 909, 1000, 917, 833, 750, 667, 583, 500, 417, 333, 250,
|
||||
167, 83, 0, 77, 154, 231, 308, 385, 462, 538, 615, 692, 769, 846, 923, 1000, 929, 857, 786, 714, 643, 571, 500, 429, 357, 286, 214, 143, 71, 0,
|
||||
67, 133, 200, 267, 333, 400, 467, 533, 600, 667, 733, 800, 867, 933, 1000, 938, 875, 813, 750, 688, 625, 563, 500, 438, 375, 313, 250, 188,
|
||||
125, 63, 0, 56, 111, 167, 222, 278, 333, 389, 444, 500, 556, 611, 667, 722, 778, 833, 889, 944, 1000, 944, 889, 833, 778, 722, 667, 611, 556,
|
||||
500, 444, 389, 333, 278, 222, 167, 111, 56, 0, 48, 95, 143, 190, 238, 286, 333, 381, 429, 476, 524, 571, 619, 667, 714, 762, 810, 857, 905, 952,
|
||||
1000, 955, 909, 864, 818, 773, 727, 682, 636, 591, 545, 500, 455, 409, 364, 318, 273, 227, 182, 136, 91, 45, 0
|
||||
};
|
||||
|
||||
//偶数三角滤波器首尾相连的折线
|
||||
const u16 tri_even[] = {
|
||||
|
||||
250, 500, 750, 1000, 667, 334, 0, 200, 400, 600, 800, 1000, 750, 500, 250, 0, 200, 400, 600, 800, 1000, 800, 600, 400, 200, 0, 167, 333, 500,
|
||||
667, 833, 1000, 833, 667, 500, 333, 167, 0, 167, 333, 500, 667, 833, 1000, 857, 714, 571, 429, 286, 143, 0, 125, 250, 375, 500, 625, 750, 875,
|
||||
1000, 875, 750, 625, 500, 375, 250, 125, 0, 111, 222, 333, 444, 556, 667, 778, 889, 1000, 889, 778, 667, 556, 444, 333, 222, 111, 0, 91, 182,
|
||||
273, 364, 455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545, 455, 364, 273, 182, 91, 0, 83, 167, 250, 333, 417, 500, 583, 667, 750,
|
||||
833, 917, 1000, 923, 846, 769, 692, 615, 538, 462, 385, 308, 231, 154, 77, 0, 71, 143, 214, 286, 357, 429, 500, 571, 643, 714, 786, 857, 929,
|
||||
1000, 933, 867, 800, 733, 667, 600, 533, 467, 400, 333, 267, 200, 133, 67, 0, 63, 125, 188, 250, 313, 375, 438, 500, 563, 625, 688, 750, 813,
|
||||
875, 938, 1000, 944, 889, 833, 778, 722, 667, 611, 556, 500, 444, 389, 333, 278, 222, 167, 111, 56, 0, 56, 111, 167, 222, 278, 333, 389, 444,
|
||||
500, 556, 611, 667, 722, 778, 833, 889, 944, 1000, 952, 905, 857, 810, 762, 714, 667, 619, 571, 524, 476, 429, 381, 333, 286, 238, 190, 143,
|
||||
95, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
#endif
|
||||
|
||||
// 反离散余弦变换 余弦表 24*12
|
||||
const s8 dct_arg[] = {
|
||||
|
||||
100, 98, 95, 90, 83, 75, 66, 56, 44, 32, 20, 7, -7, -20, -32, -44, -56, -66, -75, -83, -90, -95, -98, -100,
|
||||
99, 92, 79, 61, 38, 13, -13, -38, -61, -79, -92, -99, -99, -92, -79, -61, -38, -13, 13, 38, 61, 79, 92, 99,
|
||||
98, 83, 56, 20, -20, -56, -83, -98, -98, -83, -56, -20, 20, 56, 83, 98, 98, 83, 56, 20, -20, -56, -83, -98,
|
||||
97, 71, 26, -26, -71, -97, -97, -71, -26, 26, 71, 97, 97, 71, 26, -26, -71, -97, -97, -71, -26, 26, 71, 97,
|
||||
95, 56, -7, -66, -98, -90, -44, 20, 75, 100, 83, 32, -32, -83, -100, -75, -20, 44, 90, 98, 66, 7, -56, -95,
|
||||
92, 38, -38, -92, -92, -38, 38, 92, 92, 38, -38, -92, -92, -38, 38, 92, 92, 38, -38, -92, -92, -38, 38, 92,
|
||||
90, 20, -66, -100, -56, 32, 95, 83, 7, -75, -98, -44, 44, 98, 75, -7, -83, -95, -32, 56, 100, 66, -20, -90,
|
||||
87, 0, -87, -87, 0, 87, 87, 0, -87, -87, 0, 87, 87, 0, -87, -87, 0, 87, 87, 0, -87, -87, 0, 87,
|
||||
83, -20, -98, -56, 56, 98, 20, -83, -83, 20, 98, 56, -56, -98, -20, 83, 83, -20, -98, -56, 56, 98, 20, -83,
|
||||
79, -38, -99, -13, 92, 61, -61, -92, 13, 99, 38, -79, -79, 38, 99, 13, -92, -61, 61, 92, -13, -99, -38, 79,
|
||||
75, -56, -90, 32, 98, -7, -100, -20, 95, 44, -83, -66, 66, 83, -44, -95, 20, 100, 7, -98, -32, 90, 56, -75,
|
||||
71, -71, -71, 71, 71, -71, -71, 71, 71, -71, -71, 71, 71, -71, -71, 71, 71, -71, -71, 71, 71, -71, -71, 71,
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
333
libraries/Maix_Speech_Recognition/src/util/VAD.c
Normal file
333
libraries/Maix_Speech_Recognition/src/util/VAD.c
Normal file
@@ -0,0 +1,333 @@
|
||||
/******** VAD.C *******/
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "g_def.h"
|
||||
#include "ADC.h"
|
||||
#include "VAD.h"
|
||||
|
||||
#define n_thl_ratio 1 //噪声门限系数 n_thl=n_max_mean*n_thl_ratio
|
||||
#define s_thl_ratio(x) (x*30/10) //短时幅度判决门限系数 s_thl=sum_mean*s_thl_ratio
|
||||
#define z_thl_ratio(x) (x*2/160) //短时过零率 判决门限系数 常数
|
||||
|
||||
|
||||
#define atap_frm_t 30 //背景噪音自适应时间帧长度 ms
|
||||
#define atap_frm_len ((fs/1000)*atap_frm_t) //背景噪音自适应帧长度
|
||||
|
||||
u16 vad_data[VcBuf_Len];
|
||||
u32 frm_n;
|
||||
|
||||
/* 求取自适应参数
|
||||
* noise :噪声起始点
|
||||
n_len :噪声长度
|
||||
atap ;自适应参数
|
||||
*/
|
||||
void noise_atap(const u16 *noise, u16 n_len, atap_tag *atap)
|
||||
{
|
||||
u32 h, i;
|
||||
u32 n_max;
|
||||
u32 max_sum;//每一帧噪声最大值 累加取平均 求噪声阈值
|
||||
u32 n_sum; //所有数值之和 求平均值 确定零(中)值
|
||||
u32 mid; //中值
|
||||
u32 abs; //绝对值
|
||||
u32 abs_sum;//绝对值和
|
||||
u32 frm_num;
|
||||
|
||||
if ((n_len%atap_frm_len) != 0) //参数检查
|
||||
return;
|
||||
frm_num = n_len/atap_frm_len;
|
||||
|
||||
n_sum = 0;
|
||||
max_sum = 0;
|
||||
for (i = 0; i < n_len; i++)
|
||||
n_sum += *(noise+i);
|
||||
mid = n_sum/i;
|
||||
|
||||
abs_sum = 0;
|
||||
for (i = 0; i < n_len; i += atap_frm_len) {
|
||||
n_max = 0;
|
||||
for (h = 0; h < atap_frm_len; h++) {
|
||||
abs = (*(noise+i+h) > mid)?(*(noise+i+h)-mid):(mid-*(noise+i+h));
|
||||
if (abs > n_max) ////取每帧最大绝对值
|
||||
n_max = abs;
|
||||
abs_sum += abs;
|
||||
}
|
||||
max_sum += n_max;
|
||||
//USART1_printf("n_max=%d ", n_max);
|
||||
//USART1_printf("max_sum=%d\r\n", max_sum);
|
||||
}
|
||||
|
||||
abs_sum /= (n_len/FRAME_LEN);
|
||||
max_sum /= frm_num;
|
||||
atap->mid_val = mid;
|
||||
atap->n_thl = max_sum*n_thl_ratio;
|
||||
atap->s_thl = s_thl_ratio(abs_sum);
|
||||
atap->z_thl = z_thl_ratio(FRAME_LEN)/n_thl_ratio;
|
||||
printf("VAD sum=%d ", atap->s_thl);
|
||||
printf("VAD zero=%d\n", atap->z_thl);
|
||||
}
|
||||
#define v_durmin_t 200 //有效语音最短时间门限 ms
|
||||
#define v_durmin_f v_durmin_t/(frame_time-frame_mov_t) //有效语音最短帧数
|
||||
#define s_durmax_t 210 //无声段最长时间门限 ms
|
||||
#define s_durmax_f s_durmax_t/(frame_time-frame_mov_t)//无声段最长帧数
|
||||
|
||||
/*******
|
||||
* VAD (Voice activity detection) 语音活动检测
|
||||
检测出一段声音中的有效语音 起始点和长度 最多3段语音
|
||||
|
||||
短时幅度 短时过零率求取:
|
||||
短时幅度直接累加
|
||||
短时过零率改进为过门限率,设置正负两个绝对值相等的门限。
|
||||
构成门限带,穿过门限带计作过零
|
||||
|
||||
端点判决:
|
||||
1.判断语音起始点,要求能够滤除突发性噪声
|
||||
突发性噪声可以引起短时能量或过零率的数值很高,但是往往不能维持足够长的时间,
|
||||
如门窗的开关,物体的碰撞等引起的噪声,这些都可以通过设定最短时间门限来判别。
|
||||
超过两门限之一或全部,并且持续时间超过有效语音最短时间门限,
|
||||
返回最开始超过门限的时间点,将其标记为有效语音起始点。
|
||||
|
||||
2.判断语音结束点,要求不能丢弃连词中间短暂的有可能被噪声淹没的“寂静段”
|
||||
同时低于两门限,并且持续时间超过无声最长时间门限,
|
||||
返回最开始低于门限的时间点,将其标记为有效语音结束点。
|
||||
*********/
|
||||
void VAD(const u16 *vc, u16 buf_len, valid_tag *valid_voice, atap_tag *atap_arg)
|
||||
{
|
||||
u8 last_sig = 0; // 上次跃出门限带的状态 1:门限带以下;2:门限带以上
|
||||
u8 cur_stus = 0; // 当前语音段状态 0无声段 1前端过渡段 2语音段 3后端过渡段
|
||||
u16 front_duration = 0;//前端过渡段超过门限值持续帧数
|
||||
u16 back_duration = 0;//后端过渡段低于门限值持续帧数
|
||||
u32 h, i;
|
||||
u32 frm_sum; // 短时绝对值和
|
||||
u32 frm_zero; // 短时过零(门限)率
|
||||
u32 a_thl; // 上门限值
|
||||
u32 b_thl; // 下门限值
|
||||
|
||||
u8 valid_con = 0;//语音段计数 最大max_vc_con
|
||||
u32 frm_con = 0; //帧计数
|
||||
|
||||
a_thl = atap_arg->mid_val+atap_arg->n_thl;
|
||||
b_thl = atap_arg->mid_val-atap_arg->n_thl;
|
||||
|
||||
for (i = 0; i < max_vc_con; i++) {
|
||||
((valid_tag *)(valid_voice+i))->start = (void *)0;
|
||||
((valid_tag *)(valid_voice+i))->end = (void *)0;
|
||||
}
|
||||
|
||||
for (i = 0; i < (buf_len-FRAME_LEN); i += (FRAME_LEN-frame_mov)) {
|
||||
frm_con++;
|
||||
|
||||
frm_sum = 0;
|
||||
for (h = 0; h < FRAME_LEN; h++)//短时绝对值和
|
||||
frm_sum += (*(vc+i+h) > (atap_arg->mid_val))?(*(vc+i+h)-(atap_arg->mid_val)):((atap_arg->mid_val)-*(vc+i+h));
|
||||
|
||||
frm_zero = 0;
|
||||
for (h = 0; h < (FRAME_LEN-1); h++) {//短时过门限率
|
||||
if (*(vc+i+h) >= a_thl) //大于上门限值
|
||||
last_sig = 2;
|
||||
else if (*(vc+i+h) < b_thl) //小于下门限值
|
||||
last_sig = 1;
|
||||
|
||||
if (*(vc+i+h+1) >= a_thl) {
|
||||
if (last_sig == 1)
|
||||
frm_zero++;
|
||||
} else if (*(vc+i+h+1) < b_thl) {
|
||||
if (last_sig == 2)
|
||||
frm_zero++;
|
||||
}
|
||||
}
|
||||
//USART1_printf("frm_con=%d ",frm_con);
|
||||
//USART1_printf("frm_sum=%d ",frm_sum);
|
||||
//USART1_printf("frm_zero=%d\r\n",frm_zero);
|
||||
|
||||
if ((frm_sum > (atap_arg->s_thl)) || (frm_zero > (atap_arg->z_thl))) {
|
||||
//至少有一个参数超过其门限值
|
||||
// if(frm_sum>(atap_arg->s_thl))
|
||||
// printf("frm_sum ok\n");
|
||||
// else
|
||||
// printf("frm_zero ok\n");
|
||||
if (cur_stus == 0) {//如果当前是无声段
|
||||
cur_stus = 1; //进入前端过渡段
|
||||
front_duration = 1; //前端过渡段持续帧数置1 第一帧
|
||||
} else if (cur_stus == 1) {//当前是前端过渡段
|
||||
front_duration++;
|
||||
if (front_duration >= v_durmin_f) { //前端过渡段帧数超过最短有效语音帧数
|
||||
cur_stus = 2; //进入语音段
|
||||
((valid_tag *)(valid_voice+valid_con))->start = (u16 *)vc+i-((v_durmin_f-1)*(FRAME_LEN-frame_mov));//记录起始帧位置
|
||||
front_duration = 0; //前端过渡段持续帧数置0
|
||||
}
|
||||
} else if (cur_stus == 3) { //如果当前是后端过渡段 两参数都回升到门限值以上
|
||||
back_duration = 0;
|
||||
cur_stus = 2; //记为语音段
|
||||
}
|
||||
} else {//两参数都在门限值以下
|
||||
// printf("frm not ok\n");
|
||||
if (cur_stus == 2) {//当前是语音段
|
||||
cur_stus = 3;//设为后端过渡段
|
||||
back_duration = 1; //前端过渡段持续帧数置1 第一帧
|
||||
} else if (cur_stus == 3) {//当前是后端过渡段
|
||||
back_duration++;
|
||||
if (back_duration >= s_durmax_f) { //后端过渡段帧数超过最长无声帧数
|
||||
cur_stus = 0; //进入无声段
|
||||
((valid_tag *)(valid_voice+valid_con))->end = (u16 *)vc+i-(s_durmax_f*(FRAME_LEN-frame_mov))+FRAME_LEN;//记录结束帧位置
|
||||
valid_con++;
|
||||
if (valid_con == max_vc_con)
|
||||
return;
|
||||
back_duration = 0;
|
||||
}
|
||||
} else if (cur_stus == 1) {//当前是前端过渡段 两参数都回落到门限值以下
|
||||
//持续时间低于语音最短时间门限 视为短时噪声
|
||||
front_duration = 0;
|
||||
cur_stus = 0; //记为无声段
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
u8 VAD2(const u16 *vc, valid_tag *valid_voice, atap_tag *atap_arg)
|
||||
{
|
||||
u8 last_sig = 0; // 上次跃出门限带的状态 1:门限带以下;2:门限带以上
|
||||
static u8 cur_stus; // 当前语音段状态 0无声段 1前端过渡段 2语音段 3后端过渡段
|
||||
static u16 front_duration;//前端过渡段超过门限值持续帧数
|
||||
static u16 back_duration;//后端过渡段低于门限值持续帧数
|
||||
static u8 word_num_tmp;
|
||||
u32 h, i;
|
||||
u32 frm_sum; // 短时绝对值和
|
||||
u32 frm_zero; // 短时过零(门限)率
|
||||
u32 a_thl; // 上门限值
|
||||
u32 b_thl; // 下门限值
|
||||
|
||||
a_thl = atap_arg->mid_val+atap_arg->n_thl;
|
||||
b_thl = atap_arg->mid_val-atap_arg->n_thl;
|
||||
|
||||
frm_sum = 0;
|
||||
for (h = 0; h < FRAME_LEN; h++)//短时绝对值和
|
||||
frm_sum += (*(vc+h) > (atap_arg->mid_val))?(*(vc+h)-(atap_arg->mid_val)):((atap_arg->mid_val)-*(vc+h));
|
||||
|
||||
frm_zero = 0;
|
||||
for (h = 0; h < (FRAME_LEN-1); h++) {//短时过门限率
|
||||
if (*(vc+h) >= a_thl) //大于上门限值
|
||||
last_sig = 2;
|
||||
else if (*(vc+h) < b_thl) //小于下门限值
|
||||
last_sig = 1;
|
||||
|
||||
if (*(vc+h+1) >= a_thl) {
|
||||
if (last_sig == 1)
|
||||
frm_zero++;
|
||||
} else if (*(vc+h+1) < b_thl) {
|
||||
if (last_sig == 2)
|
||||
frm_zero++;
|
||||
}
|
||||
}
|
||||
|
||||
// printf("frm_sum=%d\n",frm_sum);
|
||||
// printf("frm_zero=%d ",frm_zero);
|
||||
|
||||
if (FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n > VcBuf_Len - (FRAME_LEN - frame_mov)) {//over frame length
|
||||
cur_stus = 0; //进入无声段
|
||||
frm_n = 0;
|
||||
printf("I am here\n");
|
||||
return 0;
|
||||
}
|
||||
if ((frm_sum > (atap_arg->s_thl)) || (frm_zero > (atap_arg->z_thl))) {
|
||||
//至少有一个参数超过其门限值
|
||||
if (frm_sum > (atap_arg->s_thl))
|
||||
printf("frm_sum ok\n");
|
||||
else
|
||||
printf("frm_zero ok\n");
|
||||
if (cur_stus == 0) {//如果当前是无声段
|
||||
cur_stus = 1; //进入前端过渡段
|
||||
front_duration = 1; //前端过渡段持续帧数置1 第一帧
|
||||
frm_n = 0;
|
||||
word_num_tmp = 1;
|
||||
for (i = 0; i < FRAME_LEN; i++) //copy the valid data
|
||||
vad_data[i] = vc[i];
|
||||
} else if (cur_stus == 1) { //当前是前端过渡段
|
||||
front_duration++;
|
||||
if (front_duration >= v_durmin_f) {//前端过渡段帧数超过最短有效语音帧数
|
||||
cur_stus = 2; //进入语音段
|
||||
front_duration = 0; //前端过渡段持续帧数置0
|
||||
valid_voice[0].start = (u16 *)vad_data;
|
||||
}
|
||||
for (i = 0; i < FRAME_LEN - frame_mov; i++)//copy the valid data
|
||||
vad_data[FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n + i] = vc[i + frame_mov];
|
||||
frm_n++;
|
||||
if (FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n > VcBuf_Len + frame_mov - FRAME_LEN) {
|
||||
cur_stus = 0; //进入无声段
|
||||
valid_voice[0].end = (u16 *)vad_data + VcBuf_Len;//记录结束帧位置
|
||||
// valid_voice[0].word_num = word_num_tmp;
|
||||
return 1;
|
||||
}
|
||||
} else if (cur_stus == 3) { //如果当前是后端过渡段 两参数都回升到门限值以上
|
||||
if (back_duration > 5)
|
||||
word_num_tmp++;
|
||||
// printf("back_duration = %d\n", back_duration);
|
||||
back_duration = 0;
|
||||
cur_stus = 2; //记为语音段
|
||||
for (i = 0; i < FRAME_LEN - frame_mov; i++)//copy the valid data
|
||||
vad_data[FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n + i] = vc[i + frame_mov];
|
||||
frm_n++;
|
||||
if (FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n > VcBuf_Len + frame_mov - FRAME_LEN) {
|
||||
cur_stus = 0; //进入无声段
|
||||
valid_voice[0].end = (u16 *)vad_data + VcBuf_Len;//记录结束帧位置
|
||||
// valid_voice[0].word_num = word_num_tmp;
|
||||
return 1;
|
||||
}
|
||||
} else if (cur_stus == 2) {
|
||||
for (i = 0; i < FRAME_LEN - frame_mov; i++)//copy the valid data
|
||||
vad_data[FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n + i] = vc[i + frame_mov];
|
||||
frm_n++;
|
||||
if (FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n > VcBuf_Len + frame_mov - FRAME_LEN) {
|
||||
cur_stus = 0; //进入无声段
|
||||
valid_voice[0].end = (u16 *)vad_data + VcBuf_Len;//记录结束帧位置
|
||||
// valid_voice[0].word_num = word_num_tmp;
|
||||
return 1;
|
||||
}
|
||||
|
||||
}
|
||||
} else {//两参数都在门限值以下
|
||||
// printf("frm error\n");
|
||||
if (cur_stus == 2) {//当前是语音段
|
||||
cur_stus = 3;//设为后端过渡段
|
||||
back_duration = 1; //前端过渡段持续帧数置1 第一帧
|
||||
#if 0
|
||||
for (i = 0; i < FRAME_LEN - frame_mov; i++)//copy the valid data
|
||||
vad_data[FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n + i] = vc[i + frame_mov];
|
||||
frm_n++;
|
||||
if (FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n > VcBuf_Len + frame_mov - FRAME_LEN) {
|
||||
cur_stus = 0; //进入无声段
|
||||
valid_voice[0].end = (u16 *)vad_data + VcBuf_Len;//记录结束帧位置
|
||||
// valid_voice[0].word_num = word_num_tmp;
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
} else if (cur_stus == 3) {//当前是后端过渡段
|
||||
back_duration++;
|
||||
#if 0
|
||||
for (i = 0; i < FRAME_LEN - frame_mov; i++)//copy the valid data
|
||||
vad_data[FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n + i] = vc[i + frame_mov];
|
||||
frm_n++;
|
||||
if (FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n > VcBuf_Len + frame_mov - FRAME_LEN) {
|
||||
cur_stus = 0; //进入无声段
|
||||
valid_voice[0].end = (u16 *)vad_data + VcBuf_Len;//记录结束帧位置
|
||||
// valid_voice[0].word_num = word_num_tmp;
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
if (back_duration >= s_durmax_f) {//后端过渡段帧数超过最长无声帧数
|
||||
cur_stus = 0; //进入无声段
|
||||
back_duration = 0;
|
||||
frm_n = frm_n - s_durmax_f;
|
||||
valid_voice[0].end = (u16 *)vad_data+frm_n*(FRAME_LEN-frame_mov)+FRAME_LEN;//记录结束帧位置
|
||||
// valid_voice[0].word_num = word_num_tmp;
|
||||
return 1;
|
||||
}
|
||||
} else if (cur_stus == 1) {//当前是前端过渡段 两参数都回落到门限值以下
|
||||
//持续时间低于语音最短时间门限 视为短时噪声
|
||||
front_duration = 0;
|
||||
cur_stus = 0; //记为无声段
|
||||
frm_n = 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
42
libraries/Maix_Speech_Recognition/src/util/VAD.h
Normal file
42
libraries/Maix_Speech_Recognition/src/util/VAD.h
Normal file
@@ -0,0 +1,42 @@
|
||||
#ifndef _VAD_H
|
||||
#define _VAD_H
|
||||
|
||||
#include "g_def.h"
|
||||
|
||||
#define max_vc_con 3 //VAD最多检测的语音段数
|
||||
#define frame_time 20 // 每帧时间长度 单位ms
|
||||
#define frame_mov_t 10 // 帧移
|
||||
#define FRAME_LEN (frame_time*fs/1000) // 帧长
|
||||
#define frame_mov (frame_mov_t *fs/1000) // 帧移,相邻帧交叠部分
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#pragma pack(1)
|
||||
typedef struct {
|
||||
u32 mid_val; //语音段中值 相当于有符号的0值 用于短时过零率计算
|
||||
u16 n_thl; //噪声阈值,用于短时过零率计算
|
||||
u16 z_thl; //短时过零率阈值,超过此阈值,视为进入过渡段。
|
||||
u32 s_thl; //短时累加和阈值,超过此阈值,视为进入过渡段。
|
||||
} atap_tag; //自适应参数
|
||||
#pragma pack()
|
||||
|
||||
//#pragma pack(4)
|
||||
typedef struct {
|
||||
u16 *start; //起始点
|
||||
u16 *end; //结束点
|
||||
// u8 word_num;
|
||||
} valid_tag; //有效语音段
|
||||
//#pragma pack()
|
||||
|
||||
void noise_atap(const u16 *noise, u16 n_len, atap_tag *atap);
|
||||
void VAD(const u16 *vc, u16 buf_len, valid_tag *valid_voice, atap_tag *atap_arg);
|
||||
u8 VAD2(const u16 *vc, valid_tag *valid_voice, atap_tag *atap_arg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
105
libraries/Maix_Speech_Recognition/src/util/flash.c
Normal file
105
libraries/Maix_Speech_Recognition/src/util/flash.c
Normal file
@@ -0,0 +1,105 @@
|
||||
/* 适用于STM32F103VE
|
||||
* 全部falsh 512KB 256页 每页2KB
|
||||
|
||||
每个语音特征模板占用4KB 采用冗余模板 每个语音指令4个特征模板
|
||||
初步设计设定20个语音指令 共占用320KB
|
||||
|
||||
flash最后320KB用于存储语音特征模板
|
||||
编译器需设置 以免存储区被代码占用
|
||||
烧写程序时也不能擦除存储区 选擦除需要的页
|
||||
*/
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "g_def.h"
|
||||
#include "flash.h"
|
||||
#include "uarths.h"
|
||||
#include "MFCC.h"
|
||||
//#include "voice_modle.h"
|
||||
|
||||
v_ftr_tag ftr_save[20 * 4];
|
||||
|
||||
u8 save_ftr_mdl(v_ftr_tag *ftr, u32 addr)
|
||||
{
|
||||
// u32 ftr_size;
|
||||
|
||||
addr = addr / size_per_ftr;
|
||||
|
||||
if (addr > 40) {
|
||||
printf("flash addr error");
|
||||
return Flash_Fail;
|
||||
}
|
||||
ftr->save_sign = save_mask;
|
||||
ftr_save[addr] = *ftr;
|
||||
|
||||
// ftr_size=2*mfcc_num*ftr->frm_num;
|
||||
|
||||
return Flash_Success;
|
||||
}
|
||||
#if 0
|
||||
void init_voice_mdl(void)
|
||||
{
|
||||
uint16_t i, j, comm;
|
||||
|
||||
for (comm = 0; comm < 4; comm++) {
|
||||
for (j = 0; j < 4; j++) {
|
||||
ftr_save[comm*4+j].save_sign = save_mask;
|
||||
ftr_save[comm*4+j].frm_num = mdl_fram_num[comm*4+j];
|
||||
// ftr_save[comm*4+j].word_num = 2;
|
||||
}
|
||||
}
|
||||
//
|
||||
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
|
||||
ftr_save[0].mfcc_dat[i] = start1[i];
|
||||
|
||||
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
|
||||
ftr_save[1].mfcc_dat[i] = start2[i];
|
||||
|
||||
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
|
||||
ftr_save[2].mfcc_dat[i] = start3[i];
|
||||
|
||||
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
|
||||
ftr_save[3].mfcc_dat[i] = start4[i];
|
||||
|
||||
|
||||
//
|
||||
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
|
||||
ftr_save[4].mfcc_dat[i] = pause1[i];
|
||||
|
||||
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
|
||||
ftr_save[5].mfcc_dat[i] = pause2[i];
|
||||
|
||||
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
|
||||
ftr_save[6].mfcc_dat[i] = pause3[i];
|
||||
|
||||
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
|
||||
ftr_save[7].mfcc_dat[i] = pause4[i];
|
||||
|
||||
//
|
||||
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
|
||||
ftr_save[8].mfcc_dat[i] = cancle1[i];
|
||||
|
||||
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
|
||||
ftr_save[9].mfcc_dat[i] = cancle2[i];
|
||||
|
||||
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
|
||||
ftr_save[10].mfcc_dat[i] = cancle3[i];
|
||||
|
||||
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
|
||||
ftr_save[11].mfcc_dat[i] = cancle4[i];
|
||||
|
||||
//
|
||||
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
|
||||
ftr_save[12].mfcc_dat[i] = confirm1[i];
|
||||
|
||||
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
|
||||
ftr_save[13].mfcc_dat[i] = confirm2[i];
|
||||
|
||||
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
|
||||
ftr_save[14].mfcc_dat[i] = confirm3[i];
|
||||
|
||||
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
|
||||
ftr_save[15].mfcc_dat[i] = confirm4[i];
|
||||
}
|
||||
#endif
|
||||
39
libraries/Maix_Speech_Recognition/src/util/flash.h
Normal file
39
libraries/Maix_Speech_Recognition/src/util/flash.h
Normal file
@@ -0,0 +1,39 @@
|
||||
#ifndef __FLASH_H
|
||||
#define __FLASH_H
|
||||
|
||||
#include "g_def.h"
|
||||
#include "MFCC.h"
|
||||
|
||||
#define FLASH_PAGE_SIZE 2048
|
||||
|
||||
#define Flash_Fail 3
|
||||
#define Flash_Success 0
|
||||
|
||||
#define save_mask 12345
|
||||
|
||||
#define size_per_ftr (4*1024)
|
||||
#define page_per_ftr (size_per_ftr/FLASH_PAGE_SIZE)
|
||||
#define ftr_per_comm 4
|
||||
#define size_per_comm (ftr_per_comm*size_per_ftr)
|
||||
#define comm_num 10
|
||||
#define ftr_total_size (size_per_comm*comm_num)
|
||||
//#define ftr_end_addr 0x8080000
|
||||
#define ftr_end_addr (size_per_ftr * ftr_per_comm * comm_num)
|
||||
#define ftr_start_addr 0 //(ftr_end_addr-ftr_total_size)
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
u8 save_ftr_mdl(v_ftr_tag *ftr, u32 addr);
|
||||
|
||||
//void init_voice_mdl(void);
|
||||
extern v_ftr_tag ftr_save[20 * 4];
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
13
libraries/Maix_Speech_Recognition/src/util/g_def.h
Normal file
13
libraries/Maix_Speech_Recognition/src/util/g_def.h
Normal file
@@ -0,0 +1,13 @@
|
||||
#ifndef _G_DEF_H_
|
||||
#define _G_DEF_H_
|
||||
|
||||
typedef uint32_t u32;
|
||||
typedef uint16_t u16;
|
||||
typedef uint8_t u8;
|
||||
|
||||
typedef int32_t s32;
|
||||
typedef int16_t s16;
|
||||
typedef int8_t s8;
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user