add speech recognition lib

This commit is contained in:
Bigbits
2019-05-15 13:00:35 +08:00
parent 6d6add4a05
commit 8552b652c3
19 changed files with 3347 additions and 0 deletions

View File

@@ -0,0 +1,20 @@
#include "Maix_Speech_Recognition.h"
SpeechRecognizer rec;
void setup()
{
rec.begin();
Serial.begin(115200);
Serial.println("start rec...");
if( rec.record(0, 0) == 0) //keyword_num, model_num
{
rec.print_model(0, 0);
}
else
Serial.println("rec failed");
}
void loop()
{
}

View File

@@ -0,0 +1,71 @@
#include "Maix_Speech_Recognition.h"
#include "voice_model.h"
SpeechRecognizer rec;
void setup()
{
pinMode(LED_RED, OUTPUT);
pinMode(LED_BLUE, OUTPUT);
pinMode(LED_GREEN, OUTPUT);
rec.begin();
Serial.begin(115200);
Serial.println("init model...");
rec.addVoiceModel(0, 0, hongse_0, fram_num_hongse_0); //36
rec.addVoiceModel(0, 1, hongse_1, fram_num_hongse_1); //20
rec.addVoiceModel(0, 2, hongse_2, fram_num_hongse_2); //23
rec.addVoiceModel(0, 3, hongse_3, fram_num_hongse_3); //21
rec.addVoiceModel(1, 0, lvse_0, fram_num_lvse_0); //20
rec.addVoiceModel(1, 1, lvse_1, fram_num_lvse_1); //37
rec.addVoiceModel(1, 2, lvse_2, fram_num_lvse_2); //19
rec.addVoiceModel(1, 3, lvse_3, fram_num_lvse_3); //26
rec.addVoiceModel(2, 0, lanse_0, fram_num_lanse_0); //19
rec.addVoiceModel(2, 1, lanse_1, fram_num_lanse_1); //21
rec.addVoiceModel(2, 2, lanse_2, fram_num_lanse_2); //28
rec.addVoiceModel(2, 3, lanse_3, fram_num_lanse_3); //22
rec.addVoiceModel(3, 0, guandeng_0, fram_num_guandeng_0); //26
rec.addVoiceModel(3, 1, guandeng_1, fram_num_guandeng_1); //29
rec.addVoiceModel(3, 2, guandeng_2, fram_num_guandeng_2); //33
rec.addVoiceModel(3, 3, guandeng_3, fram_num_guandeng_3); //29
Serial.println("init model ok!");
}
void loop()
{
int res;
res = rec.recognize();
Serial.printf("res : %d ", res);
if (res > 0){
switch (res)
{
case 1:
digitalWrite(LED_RED, LOW); //power on red led
digitalWrite(LED_GREEN, HIGH);
digitalWrite(LED_BLUE, HIGH);
Serial.println("rec : hongse ");
break;
case 2:
digitalWrite(LED_GREEN, LOW); //power on green led
digitalWrite(LED_RED, HIGH);
digitalWrite(LED_BLUE, HIGH);
Serial.println("rec : lvse ");
break;
case 3:
digitalWrite(LED_BLUE, LOW); //power on blue led
digitalWrite(LED_RED, HIGH);
digitalWrite(LED_GREEN, HIGH);
Serial.println("rec : lanse ");
break;
case 4:
digitalWrite(LED_RED, HIGH);
digitalWrite(LED_GREEN, HIGH);
digitalWrite(LED_BLUE, HIGH); //power off all leds
Serial.println("rec : guandeng ");
default:
break;
}
}else
{
Serial.println("recognize failed.");
}
delay(1000);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,9 @@
name=Maix_Speech_Recognition
version=1.0
author=BigBits
maintainer=BigBits<bigbits@hackx.cc>
sentence=Maix Boards Speech Recognition Lib
paragraph=Maix Boards Speech Recognition Lib
category=Uncategorized
url=
architectures=k210

View File

@@ -0,0 +1,385 @@
#include "Maix_Speech_Recognition.h"
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "sysctl.h"
#include "plic.h"
#include "uarths.h"
#include "util/g_def.h"
#include "i2s.h"
#include "fpioa.h"
#include "util/VAD.h"
#include "util/MFCC.h"
#include "util/DTW.h"
#include "util/flash.h"
#include "util/ADC.h"
#define USART1_printf printf
uint16_t VcBuf[atap_len];
atap_tag atap_arg;
valid_tag valid_voice[max_vc_con];
v_ftr_tag ftr;
v_ftr_tag ftr_temp;
v_ftr_tag ftr_mdl_temp[10];
v_ftr_tag *pftr_mdl_temp[10];
#define save_ok 0
#define VAD_fail 1
#define MFCC_fail 2
#define Flash_fail 3
#define FFT_N 512
uint16_t rx_buf[FRAME_LEN];
uint32_t g_rx_dma_buf[FRAME_LEN * 2];
uint64_t fft_out_data[FFT_N / 2];
volatile uint32_t g_index;
volatile uint8_t uart_rec_flag;
volatile uint32_t receive_char;
volatile uint8_t i2s_rec_flag;
volatile uint8_t i2s_start_flag = 0;
int i2s_dma_irq(void *ctx)
{
uint32_t i;
if(i2s_start_flag)
{
int16_t s_tmp;
if(g_index)
{
i2s_receive_data_dma(I2S_DEVICE_0, &g_rx_dma_buf[g_index], frame_mov * 2, DMAC_CHANNEL3);
g_index = 0;
for(i = 0; i < frame_mov; i++)
{
s_tmp = (int16_t)(g_rx_dma_buf[2 * i] & 0xffff); //g_rx_dma_buf[2 * i + 1] Low left
rx_buf[i] = s_tmp + 32768;
}
i2s_rec_flag = 1;
}
else
{
i2s_receive_data_dma(I2S_DEVICE_0, &g_rx_dma_buf[0], frame_mov * 2, DMAC_CHANNEL3);
g_index = frame_mov * 2;
for(i = frame_mov; i < frame_mov * 2; i++)
{
s_tmp = (int16_t)(g_rx_dma_buf[2 * i] & 0xffff);//g_rx_dma_buf[2 * i + 1] Low left
rx_buf[i] = s_tmp + 32768;
}
i2s_rec_flag = 2;
}
}
else
{
i2s_receive_data_dma(I2S_DEVICE_0, &g_rx_dma_buf[0], frame_mov * 2, DMAC_CHANNEL3);
g_index = frame_mov * 2;
}
return 0;
}
SpeechRecognizer::SpeechRecognizer(){
}
SpeechRecognizer::~SpeechRecognizer(){
}
int
SpeechRecognizer::begin()
{
//io_mux_init
fpioa_set_function(20, FUNC_I2S0_IN_D0);
fpioa_set_function(18, FUNC_I2S0_SCLK);
fpioa_set_function(19, FUNC_I2S0_WS);
//i2s init
i2s_init(I2S_DEVICE_0, I2S_RECEIVER, 0x3);
i2s_rx_channel_config(I2S_DEVICE_0, I2S_CHANNEL_0,
RESOLUTION_16_BIT, SCLK_CYCLES_32,
TRIGGER_LEVEL_4, STANDARD_MODE);
i2s_set_sample_rate(I2S_DEVICE_0, 8000);
dmac_init();
dmac_set_irq(DMAC_CHANNEL3, i2s_dma_irq, NULL, 3);
i2s_receive_data_dma(I2S_DEVICE_0, &g_rx_dma_buf[0], frame_mov * 2, DMAC_CHANNEL3);
/* Enable the machine interrupt */
sysctl_enable_irq();
return 0;
}
int
SpeechRecognizer::record(uint8_t keyword_num, uint8_t model_num)
{
if (keyword_num > 10) return -1;
if (model_num > 4) return -2;
comm = keyword_num;
uint8_t prc_count = model_num;
uint32_t addr = 0;
g_index = 0;
i2s_rec_flag = 0;
i2s_start_flag = 1;
addr = ftr_start_addr + comm * size_per_comm + prc_count * size_per_ftr;
if (save_mdl(VcBuf, addr) == save_ok) {
return 0;
}else{
return -3;
}
}
int
SpeechRecognizer::recognize()
{
u8 res;
u32 dis;
u32 recg_count = 0;
g_index = 0;
i2s_rec_flag = 0;
i2s_start_flag = 1;
res = spch_recg(VcBuf, &dis);
if(dis != dis_err)
return res;
else
return -1;
}
int
SpeechRecognizer::addVoiceModel(uint8_t keyword_num, uint8_t model_num, const int16_t *voice_model, uint16_t frame_num)
{
ftr_save[keyword_num * 4 + model_num].save_sign = save_mask;
ftr_save[keyword_num * 4 + model_num].frm_num = frame_num;
for (int i = 0; i < (vv_frm_max * mfcc_num); i++)
ftr_save[keyword_num * 4 + model_num].mfcc_dat[i] = voice_model[i];
return 0;
}
int
SpeechRecognizer::print_model(uint8_t keyword_num, uint8_t model_num)
{
printf("frm_num=%d\n", ftr_save[keyword_num*4 + model_num].frm_num);
for (int i = 0; i < (vv_frm_max * mfcc_num); i++) {
if (((i + 1) % 35) == 0)
printf("%d,\n", ftr_save[keyword_num*4 + model_num].mfcc_dat[i]);
else
printf("%d, ", ftr_save[keyword_num*4 + model_num].mfcc_dat[i]);
}
printf("\nprint model ok!\n");
return 0;
}
uint8_t SpeechRecognizer::save_mdl(uint16_t *v_dat, uint32_t addr)
{
u16 i, num;
u16 frame_index;
get_noise1:
frame_index = 0;
num = atap_len / frame_mov;
//wait for finish
while (1) {
while (i2s_rec_flag == 0)
continue;
if (i2s_rec_flag == 1) {
for (i = 0; i < frame_mov; i++)
v_dat[frame_mov * frame_index + i] = rx_buf[i];
} else {
for (i = 0; i < frame_mov; i++)
v_dat[frame_mov * frame_index + i] = rx_buf[i + frame_mov];
}
i2s_rec_flag = 0;
frame_index++;
if (frame_index >= num)
break;
}
// for(i = 0; i < atap_len; i++)
// printf("noise: %d\n", v_dat[i]);
noise_atap(v_dat, atap_len, &atap_arg);
if (atap_arg.s_thl > 10000) {
printf("get noise again...\n");
goto get_noise1;
}
printf("speeking...\n");
//wait for finish
while (i2s_rec_flag == 0)
continue;
if (i2s_rec_flag == 1) {
for (i = 0; i < frame_mov; i++)
v_dat[i + frame_mov] = rx_buf[i];
} else {
for (i = 0; i < frame_mov; i++)
v_dat[i + frame_mov] = rx_buf[i + frame_mov];
}
i2s_rec_flag = 0;
while (1) {
while (i2s_rec_flag == 0)
continue;
if (i2s_rec_flag == 1) {
for (i = 0; i < frame_mov; i++) {
v_dat[i] = v_dat[i + frame_mov];
v_dat[i + frame_mov] = rx_buf[i];
}
} else {
for (i = 0; i < frame_mov; i++) {
v_dat[i] = v_dat[i + frame_mov];
v_dat[i + frame_mov] = rx_buf[i + frame_mov];
}
}
i2s_rec_flag = 0;
if (VAD2(v_dat, valid_voice, &atap_arg) == 1)
break;
if (receive_char == 's')
return MFCC_fail;
}
// if (valid_voice[0].end == ((void *)0)) {
// printf("VAD_fail\n");
// return VAD_fail;
// }
get_mfcc(&(valid_voice[0]), &ftr, &atap_arg);
if (ftr.frm_num == 0) {
//printf("MFCC_fail\n");
return MFCC_fail;
}
// ftr.word_num = valid_voice[0].word_num;
return save_ftr_mdl(&ftr, addr);
// ftr_mdl_temp[addr] = ftr;
// return save_ok;
}
uint8_t SpeechRecognizer::spch_recg(uint16_t *v_dat, uint32_t *mtch_dis)
{
u16 i;
u32 ftr_addr;
u32 min_dis;
u16 min_comm;
u32 cur_dis;
v_ftr_tag *ftr_mdl;
u16 num;
u16 frame_index;
uint32_t cycle0, cycle1;
get_noise2:
frame_index = 0;
num = atap_len / frame_mov;
//wait for finish
i2s_rec_flag = 0;
while (1) {
while (i2s_rec_flag == 0)
continue;
if (i2s_rec_flag == 1) {
for (i = 0; i < frame_mov; i++)
v_dat[frame_mov * frame_index + i] = rx_buf[i];
} else {
for (i = 0; i < frame_mov; i++)
v_dat[frame_mov * frame_index + i] = rx_buf[i + frame_mov];
}
i2s_rec_flag = 0;
frame_index++;
if (frame_index >= num)
break;
}
noise_atap(v_dat, atap_len, &atap_arg);
if (atap_arg.s_thl > 10000) {
printf("get noise again...\n");
goto get_noise2;
}
printf("speeking...\n");
//wait for finish
while (i2s_rec_flag == 0)
continue;
if (i2s_rec_flag == 1) {
for (i = 0; i < frame_mov; i++)
v_dat[i + frame_mov] = rx_buf[i];
} else {
for (i = 0; i < frame_mov; i++)
v_dat[i + frame_mov] = rx_buf[i + frame_mov];
}
i2s_rec_flag = 0;
while (1) {
while (i2s_rec_flag == 0)
continue;
if (i2s_rec_flag == 1) {
for (i = 0; i < frame_mov; i++) {
v_dat[i] = v_dat[i + frame_mov];
v_dat[i + frame_mov] = rx_buf[i];
}
} else {
for (i = 0; i < frame_mov; i++) {
v_dat[i] = v_dat[i + frame_mov];
v_dat[i + frame_mov] = rx_buf[i + frame_mov];
}
}
i2s_rec_flag = 0;
if (VAD2(v_dat, valid_voice, &atap_arg) == 1)
break;
if (receive_char == 's') {
*mtch_dis = dis_err;
printf("send 'c' to start\n");
return 0;
}
}
printf("vad ok\n");
// if (valid_voice[0].end == ((void *)0)) {
// *mtch_dis=dis_err;
// USART1_printf("VAD fail ");
// return (void *)0;
// }
get_mfcc(&(valid_voice[0]), &ftr, &atap_arg);
if (ftr.frm_num == 0) {
*mtch_dis = dis_err;
USART1_printf("MFCC fail ");
return 0;
}
// for (i = 0; i < ftr.frm_num * mfcc_num; i++) {
// if (i % 12 == 0)
// printf("\n");
// printf("%d ", ftr.mfcc_dat[i]);
// }
// ftr.word_num = valid_voice[0].word_num;
printf("mfcc ok\n");
i = 0;
min_comm = 0;
min_dis = dis_max;
cycle0 = read_csr(mcycle);
for (ftr_addr = ftr_start_addr; ftr_addr < ftr_end_addr; ftr_addr += size_per_ftr) {
// ftr_mdl=(v_ftr_tag*)ftr_addr;
ftr_mdl = (v_ftr_tag *)(&ftr_save[ftr_addr / size_per_ftr]);
cur_dis = ((ftr_mdl->save_sign) == save_mask) ? dtw(ftr_mdl, &ftr) : dis_err;
if ((ftr_mdl->save_sign) == save_mask) {
USART1_printf("no. %d, frm_num = %d, save_mask=%d", i + 1, ftr_mdl->frm_num, ftr_mdl->save_sign);
USART1_printf("cur_dis=%d\n", cur_dis);
}
if (cur_dis < min_dis) {
min_dis = cur_dis;
min_comm = i + 1;
}
i++;
}
cycle1 = read_csr(mcycle) - cycle0;
printf("[INFO] recg cycle = 0x%08x\n", cycle1);
if (min_comm % 4)
min_comm = min_comm / ftr_per_comm + 1;
else
min_comm = min_comm / ftr_per_comm;
//USART1_printf("recg end ");
*mtch_dis = min_dis;
return (u8)min_comm;//(commstr[min_comm].str);
}

View File

@@ -0,0 +1,42 @@
#ifndef __MAIX_SPEECH_RECOGNITION_H
#define __MAIX_SPEECH_RECOGNITION_H
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "sysctl.h"
#include "plic.h"
#include "uarths.h"
#include "util/g_def.h"
#include "i2s.h"
#include "util/VAD.h"
#include "util/MFCC.h"
#include "util/DTW.h"
#include "util/flash.h"
#include "util/ADC.h"
class SpeechRecognizer{
public:
SpeechRecognizer();
~SpeechRecognizer();
int begin(); //初始化i2s
int record(uint8_t keyword_num, uint8_t model_num); //记录关键词
int recognize(); //识别,返回关键词号
int addVoiceModel(uint8_t keyword_num, uint8_t model_num, const int16_t *voice_model, uint16_t frame_num);
int print_model(uint8_t keyword_num, uint8_t model_num);
private:
uint8_t save_mdl(uint16_t *v_dat, uint32_t addr);
uint8_t spch_recg(uint16_t *v_dat, uint32_t *mtch_dis);
private:
uint8_t comm; //关键词号
};
#endif

View File

@@ -0,0 +1,22 @@
#ifndef _ADC_H
#define _ADC_H
#define fs 8000 //ADC采样率 Hz 8000
#define voice_len 3000 //录音时间长度 单位ms
#define VcBuf_Len ((fs/1000)*voice_len) //语音缓存区长度 单位点数 每个采样点16位
#define atap_len_t 300 //背景噪音自适应时间长度 ms
#define atap_len ((fs/1000)*atap_len_t) //背景噪音自适应长度
#ifdef __cplusplus
extern "C" {
#endif
void ADC_DMA_Init(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,434 @@
/******* DTW.C ********/
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "g_def.h"
#include "ADC.h"
#include "VAD.h"
#include "MFCC.h"
#include "DTW.h"
/*
* DTW算法 通过局部优化的方法实现加权距离总和最小
时间规整函数:
C={c(1),c(2),…,c(N)}
N为路径长度c(n)=(i(n),j(n))表示第n个匹配点是有参考模板的
第i(n)个特征矢量与待测模板的第j(n)个特征矢量构成的匹配点对,两
者之间的距离d(x(i(n)),y(j(n)))称为匹配距离。
时间规整函数满足一下约束:
1.单调性,规整函数单调增加。
2.起点终点约束,起点对起点,终点对终点。
3.连续性,不允许跳过任何一点。
4.最大规整量不超过某一极限值。|i(n)-j(n)|<M,M为窗宽。规整
函数所处的区域位于平行四边形内。局部路径约束用于限制当第n步
为(i(n),j(n))时,前几步存在几种可能的路径。
DTW步骤
1.初始化。令i(0)=j(0)=0,i(N)=in_frm_num,j(N)=mdl_frm_num.
确定一个平行四边形,有两个位于(0,0)和(in_frm_num,mdl_frm_num)的顶点,相邻斜边斜
率分别是2和1/2.规整函数不可超出此平行四边形。
2.递推求累计距离。
若输入特征与特征模板的帧数差别过大,直接将匹配距离设为最大
frm_in_num<(frm_mdl_num/2)||frm_in_num>(frm_mdl_num*2)
*/
int64_t avr_in[mfcc_num];
int64_t avr_mdl[mfcc_num];
int64_t standard_in[mfcc_num];
int64_t standard_mdl[mfcc_num];
int dtw_data[vv_frm_max*vv_frm_max];
struct pointOritation//节点方向用来回溯每个W点
{
int frontI, frontJ;
};
int g[vv_frm_max][vv_frm_max];
struct pointOritation pOritation[vv_frm_max][vv_frm_max];//用来存放
void gArray(int *p, int n, int m, int *g, struct pointOritation *pr)
{
int i = 0, j = 0;
*g = (*p) * 2;//起始点(最左上角的点)
for (i = 1; i < m; i++) {//最上面一横
*(g + i) = *(g + i - 1) + *(p + i);
(pr + i)->frontI = 0;
(pr + i)->frontJ = i - 1;
}
for (i = 1; i < n; i++) {//最左边的一竖
*(g+i*m+0) = *(g+(i-1)*m+0)+(*(p+i*m+0));
(pr+i*m+0)->frontI = i-1;
(pr+i*m+0)->frontJ = 0;
}
//计算剩余网格的G值
for (i = 1; i < n; i++) {
for (j = 1; j < m; j++) {
int left, up, incline;
left = *(g + i*m+j-1) + *(p + i * m + j);
up = *(g + (i - 1) * m + j) + *(p + i * m + j);
incline = *(g + (i - 1) * m + j - 1) + (*(p + i * m + j)) * 2;
//从左、上、斜三个方向选出最小的
int min = left;
*(g + i * m + j) = min;
(pr + i * m + j)->frontI = i;
(pr + i * m + j)->frontJ = j - 1;
if (min > up) {
min = up;
*(g + i * m + j) = min;
(pr + i * m + j)->frontI = i - 1;
(pr + i * m + j)->frontJ = j;
}
if (min > incline) {
min = incline;
*(g + i * m + j) = min;
(pr + i * m + j)->frontI = i - 1;
(pr + i * m + j)->frontJ = j - 1;
}
}
}
#if 0
//输出G数组
for (i = 0; i < n; i++) {
for (j = 0; j < m; j++)
printf("%d, ", *(g+i*m+j));
printf("\n");
}
//输出方向数组
for (i = 0; i < n; i++) {
for (j = 0; j < m; j++)
printf("(%d,%d), ", (pr+i*m+j)->frontI, (pr+i*m+j)->frontJ);
printf("\n");
}
#endif
}
int printPath(struct pointOritation *po, int n, int m, int *g)
{
//从最后一个点向前输出路径节点
int i = n-1, j = m - 1;
int step = 0;
while (1) {
int ii = (po + i * m + j)->frontI, jj = (po + i * m + j)->frontJ;
if (i == 0 && j == 0)
break;
// printf("(%d,%d):%d\n",i,j,*(g+i*m+j));
i = ii;
j = jj;
step++;
}
// printf("distance1 = %d\n", *(g+(n-1)*m + m -1) / step);
return step;
//printf("distance = %d\n", *(g+(n-1)*m + m -1) / step);
}
/*
* 获取两个特征矢量之间的距离
参数
frm_ftr1 特征矢量1
frm_ftr2 特征矢量2
返回值
dis 矢量距离
*/
u32 get_dis(s16 *frm_ftr1, s16 *frm_ftr2)
{
u8 i;
#if 0
#if 1
u32 dis;
s32 dif; //两矢量相同维度上的差值
dis = 0;
for (i = 0; i < mfcc_num; i++) {
//USART1_printf("dis=%d ",dis);
dif = frm_ftr1[i]-frm_ftr2[i];
dis += (dif*dif);
}
//USART1_printf("dis=%d ",dis);
dis = sqrtf(dis);
//USART1_printf("%d\r\n",dis);
return dis;
#else
u32 dis;
s32 dif;
s32 avr;
dis = 0;
for (i = 0; i < mfcc_num; i++) {
//USART1_printf("dis=%d ",dis);
avr = (frm_ftr1[i]+frm_ftr2[i]) / 2;
dif = sqrtf((frm_ftr1[i] - avr) * (frm_ftr1[i] - avr) + (frm_ftr2[i] - avr) * (frm_ftr2[i] - avr));
dif = (frm_ftr1[i]-frm_ftr2[i]) * 100 / dif;
dis += (dif*dif);
}
//USART1_printf("dis=%d ",dis);
dis = sqrtf(dis);
//USART1_printf("%d\r\n",dis);
return dis;
#endif
#else
#if 1
int64_t dif = 0;
int64_t dif_a = 0;
int64_t dif_b = 0;
int32_t dif_r = 0;
for (i = 0; i < mfcc_num; i++) {
dif += (frm_ftr1[i]*frm_ftr2[i]);
dif_a += (frm_ftr1[i] * frm_ftr1[i]);
dif_b += (frm_ftr2[i] * frm_ftr2[i]);
}
dif_r = 1000 - (dif * 1000 / sqrt(dif_a * dif_b));
return dif_r;
#else
int64_t dif = 0;
int64_t dif_a = 0;
int64_t dif_b = 0;
int32_t dif_r = 0;
for (i = 0; i < mfcc_num; i++) {
dif += ((frm_ftr1[i] - avr_mdl[i]) * (frm_ftr2[i] - avr_in[i]));
dif_a += ((frm_ftr1[i] - avr_mdl[i]) * (frm_ftr1[i] - avr_mdl[i]));
dif_b += ((frm_ftr2[i] - avr_in[i]) * (frm_ftr2[i] - avr_in[i]));
}
dif_r = 1000 - (dif * 1000 / (sqrt(dif_a * dif_b)));
return dif_r;
#endif
#endif
}
//平行四边形两外两顶点 X坐标值
static u16 X1; //上边交点
static u16 X2; //下边交点
static int in_frm_num; //输入特征帧数
static int mdl_frm_num;//特征模板帧数
#define ins 0
#define outs 1
/*
* 范围控制
*/
u8 dtw_limit(u16 x, u16 y)
{
if (x < X1) {
if (y >= ((2*x)+2))
return outs;
} else {
if ((2*y+in_frm_num-2*mdl_frm_num) >= (x+4))
return outs;
}
if (x < X2) {
if ((2*y+2) <= x)
return outs;
} else {
if ((y+4) <= (2*x+mdl_frm_num-2*in_frm_num))
return outs;
}
return ins;
}
/*
* DTW 动态时间规整
参数
ftr_in :输入特征值
ftr_mdl :特征模版
返回值
dis :累计匹配距离
*/
u32 dtw(v_ftr_tag *ftr_in, v_ftr_tag *frt_mdl)
{
u32 dis;
// u16 x, y;
u16 step;
s16 *in;
s16 *mdl;
// u32 d_right_up, right, right_up; //up,
// u32 min;
int i, j;
in_frm_num = ftr_in->frm_num;
mdl_frm_num = frt_mdl->frm_num;
if ((in_frm_num > (mdl_frm_num*3)) || ((3*in_frm_num) < mdl_frm_num)) {
//USART1_printf("in_frm_num=%d mdl_frm_num=%d\r\n", in_frm_num,mdl_frm_num);
return dis_err;
} else {
// 计算约束平行四边形顶点值
X1 = (2*mdl_frm_num-in_frm_num)/3;
X2 = (4*in_frm_num-2*mdl_frm_num)/3;
in = ftr_in->mfcc_dat;
mdl = frt_mdl->mfcc_dat;
#if 0
for (j = 0; j < mfcc_num; j++) {
avr_in[j] = 0;
avr_mdl[j] = 0;
}
for (j = 0; j < mfcc_num; j++)
for (i = 0; i < in_frm_num; i++)
avr_in[j] += in[mfcc_num * i + j];
for (j = 0; j < mfcc_num; j++)
avr_in[j] = avr_in[j] / in_frm_num;
//
for (j = 0; j < mfcc_num; j++)
for (i = 0; i < mdl_frm_num; i++)
avr_mdl[j] += mdl[mfcc_num * i + j];
for (j = 0; j < mfcc_num; j++)
avr_mdl[j] = avr_mdl[j] / mdl_frm_num;
dis = get_dis(in, mdl);
x = 1;
y = 1;
step = 1;
#endif
#if 0
for (i = 0; i < in_frm_num; i++) {
for (j = 0; j < mdl_frm_num; j++) {
printf("%d,", get_dis(mdl, in));
mdl += mfcc_num;
}
mdl = frt_mdl->mfcc_dat;
in += mfcc_num;
printf("\n");
}
in = ftr_in->mfcc_dat;
mdl = frt_mdl->mfcc_dat;
#endif
for (i = 0; i < in_frm_num; i++) {
for (j = 0; j < mdl_frm_num; j++) {
//dtw_data[i][j] = get_dis(in + (i * mfcc_num), mdl + (j * mfcc_num));
//dtw_data[i][j] = get_dis(in, mdl);
*(dtw_data+i*mdl_frm_num+j) = get_dis(in, mdl);
// printf("%d,", dtw_data[i*mdl_frm_num+j]);
mdl += mfcc_num;
}
//printf("\n");
mdl = frt_mdl->mfcc_dat;
in += mfcc_num;
}
gArray(dtw_data, in_frm_num, mdl_frm_num, *g, *pOritation);
step = printPath(*pOritation, in_frm_num, mdl_frm_num, *g);
//printf("step=%d\r\n",step);
dis = *((int *)g+(in_frm_num-1)*mdl_frm_num + mdl_frm_num - 1);
// printf("dis=%d step=%d dis/step=%d\r\n",dis,step,dis/step);
}
return (dis/step); //步长归一化
}
void get_mean(s16 *frm_ftr1, s16 *frm_ftr2, s16 *mean)
{
u8 i;
for (i = 0; i < mfcc_num; i++) {
mean[i] = (frm_ftr1[i]+frm_ftr2[i])/2;
// printf("x=%d y=%d ", frm_ftr1[i], frm_ftr2[i]);
// printf("mean=%d\r\n", mean[i]);
}
}
/*
* 从两特征矢量获取特征模板
参数
ftr_in1 :输入特征值
ftr_in2 :输入特征值
ftr_mdl :特征模版
返回值
dis :累计匹配距离
*/
u32 get_mdl(v_ftr_tag *ftr_in1, v_ftr_tag *ftr_in2, v_ftr_tag *ftr_mdl)
{
u32 dis;
u16 x, y;
u16 step;
s16 *in1;
s16 *in2;
s16 *mdl;
u32 right, right_up, d_right_up;//up,
u32 min;
in_frm_num = ftr_in1->frm_num;
mdl_frm_num = ftr_in2->frm_num;
if ((in_frm_num > (mdl_frm_num*2)) || ((2*in_frm_num) < mdl_frm_num)) {
printf("in_frm_num= %d, mdl_frm_num= %d\n", in_frm_num, mdl_frm_num);
return dis_err;
} else {
// 计算约束平行四边形顶点值
X1 = (2*mdl_frm_num-in_frm_num)/3;
X2 = (4*in_frm_num-2*mdl_frm_num)/3;
in1 = ftr_in1->mfcc_dat;
in2 = ftr_in2->mfcc_dat;
mdl = ftr_mdl->mfcc_dat;
dis = get_dis(in1, in2);
get_mean(in1, in2, mdl);
x = 1;
y = 1;
step = 1;
do {
//up = (dtw_limit(x, y+1) == ins)?get_dis(in2+mfcc_num, in1):dis_err;
d_right_up = (dtw_limit(x+1, y+2) == ins)?get_dis(in2+mfcc_num+mfcc_num, in1+mfcc_num):dis_err;
right = (dtw_limit(x+1, y) == ins)?get_dis(in2, in1+mfcc_num):dis_err;
right_up = (dtw_limit(x+1, y+1) == ins)?get_dis(in2+mfcc_num, in1+mfcc_num):dis_err;
min = right_up;
if (min > right)
min = right;
if (min > d_right_up)
min = d_right_up;
dis += min;
if (min == right_up) {
in1 += mfcc_num;
x++;
in2 += mfcc_num;
y++;
} else if (min == d_right_up) {
//in2 += mfcc_num;
//y++;
in2 = in2 + mfcc_num + mfcc_num;
y += 2;
in1 += mfcc_num;
x++;
} else {
in1 += mfcc_num;
x++;
}
step++;
mdl += mfcc_num;
get_mean(in1, in2, mdl);
// printf("x=%d y=%d\r\n", x, y);
} while ((x < in_frm_num) && (y < mdl_frm_num));
printf("step=%d\r\n", step);
ftr_mdl->frm_num = step;
}
return (dis/step); //步长归一化
}

View File

@@ -0,0 +1,20 @@
#ifndef _DTW_H
#define _DTW_H
#include "g_def.h"
#define dis_err 0xFFFFFFFF
#define dis_max 0xFFFFFFFF
#ifdef __cplusplus
extern "C" {
#endif
u32 dtw(v_ftr_tag *ftr_in, v_ftr_tag *frt_mdl);
u32 get_mdl(v_ftr_tag *ftr_in1, v_ftr_tag *ftr_in2, v_ftr_tag *ftr_mdl);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,37 @@
#include <stdio.h>
#include "FIR.h"
const double Gains[39] = {
-0.01152483856464, -0.0225638929287, -0.002300804134165, 0.01259363093284,
-0.0003113695305127, -0.01591961043518, 0.005297252625722, 0.01846679452201,
-0.01074864266549, -0.02187510941784, 0.0192688121099, 0.02485021814652,
-0.03198920030056, -0.02754770718169, 0.05351978821001, 0.0296026126381,
-0.09977798297944, -0.03089664322888, 0.3161729116138, 0.5313543998522,
0.3161729116138, -0.03089664322888, -0.09977798297944, 0.0296026126381,
0.05351978821001, -0.02754770718169, -0.03198920030056, 0.02485021814652,
0.0192688121099, -0.02187510941784, -0.01074864266549, 0.01846679452201,
0.005297252625722, -0.01591961043518, -0.0003113695305127, 0.01259363093284,
-0.002300804134165, -0.0225638929287, -0.01152483856464
};
#define ORDER 38 //阶数
double Buffer[ORDER + 1]; //采样历史数据
double Fir(double Input)
{
double Output = 0.0; //数据输出
int Index; //下标索引
//采样数据移位
for (Index = ORDER; Index > 0; Index--)
Buffer[Index] = Buffer[Index-1];
Buffer[0] = Input;
//计算输出
for (Index = 0; Index < ORDER+1; Index++)
Output += Gains[Index]*Buffer[Index];
return Output;
}

View File

@@ -0,0 +1,15 @@
#ifndef _FIR_H_
#define _FIR_H_
#ifdef __cplusplus
extern "C" {
#endif
double Fir(double Input);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,273 @@
/******* MFCC.C *******/
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "g_def.h"
#include <math.h>
#include "ADC.h"
#include "VAD.h"
#include "MFCC.h"
#include "MFCC_Arg.h"
#include <float.h>
#include "sysctl.h"
#include "dmac.h"
#include "fft.h"
#include "FIR.h"
void cr4_fft_1024_stm32(void *pssOUT, void *pssIN, u16 Nbin);
void normalize(s16 *mfcc_p, u16 frm_num);
u32 fft_out[mfcc_fft_point];
s16 fft_in[mfcc_fft_point];
extern uint64_t fft_out_data[512 / 2];
extern void fft_dma_init(void);
extern volatile fft_t *const fft;
void fft_input_intdata(int16_t *data, uint8_t point)
{
uint16_t point_num = 0;
uint16_t i;
fft_data_t input_data;
if (point == 0)
point_num = 512;
else if (point == 1)
point_num = 256;
else if (point == 2)
point_num = 128;
else if (point == 3)
point_num = 64;
point_num = point_num / 2; // one time send two data
for (i = 0; i < point_num; i++) {
input_data.R1 = data[2 * i];
input_data.I1 = 0;
input_data.R2 = data[2 * i + 1];
input_data.I2 = 0;
fft->fft_input_fifo.fft_input_fifo = *(uint64_t *)&input_data;
}
}
void fft_sync_data(int16_t *data, uint8_t point, fft_data_t *fft_data)
{
uint16_t point_num = 0;
uint16_t i;
if (point == 0)
point_num = 512;
else if (point == 1)
point_num = 256;
else if (point == 2)
point_num = 128;
else if (point == 3)
point_num = 64;
point_num = point_num / 2; // one time send two data
for (i = 0; i < point_num; i++) {
(fft_data + i)->R1 = data[2 * i];
(fft_data + i)->I1 = 0;
(fft_data + i)->R2 = data[2 * i + 1];
(fft_data + i)->I2 = 0;
}
}
/*
* cr4_fft_1024_stm32输入参数是有符号数
*
* cr4_fft_1024_stm32输入参数包括实数和虚数
但语音数据只包括实数部分 虚数用0填充
fft点数超出输入数据长度时 超过部分用0填充
cr4_fft_1024_stm32输出数据包括实数和虚数
应该取其绝对值 即平方和的根
*/
u32 *mfcc_fft(s16 *dat_buf, u16 buf_len)
{
u16 i;
s32 real, imag;
fft_data_t output_data;
if (buf_len > mfcc_fft_point)
return (void *)0;
for (i = 0; i < buf_len; i++) {
fft_in[i] = *(dat_buf+i);//虚部高位 实部低位
}
for (; i < mfcc_fft_point; i++)
fft_in[i] = 0;//超出部分用0填充
fft_data_t fft_in_buf[512];
memset(fft_in_buf, 0 , sizeof(fft_in_buf));
fft_sync_data(fft_in, FFT_512, fft_in_buf);
fft_complex_uint16_dma(DMAC_CHANNEL0, DMAC_CHANNEL1, 0x1ff, FFT_DIR_FORWARD, (uint64_t *)fft_in_buf, 512, fft_out_data);
for (i = 0; i < frq_max / 2; i++) {
output_data = *(fft_data_t *)&fft_out_data[i];
imag = (s16)output_data.I1;
real = (s16)output_data.R1;
real = real*real+imag*imag;
fft_out[2 * i] = sqrtf((float)real)*10;
imag = (s16)output_data.I2;
real = (s16)output_data.R2;
real = real*real+imag*imag;
fft_out[2 * i + 1] = sqrtf((float)real)*10;
}
return fft_out;
}
/* MFCCMel频率倒谱系数
*
参数:
valid 有效语音段起点终点
返回值:
v_ftr MFCC值帧数
Mel=2595*lg(1+f/700)
1000Hz以下按线性刻度 1000Hz以上按对数刻度
三角型滤波器中心频率 在Mel频率刻度上等间距排列
预加重:6dB/倍频程 一阶高通滤波器 H(z)=1-uz^(-1) y(n)=x(n)-ux(n-1) u=0.94~0.97
MFCC 步骤:
1.对语音信号预加重、分帧、加汉明窗处理,然后进行短时傅里叶变换,得出频谱
2.取频谱平方得能量谱。并用24个Mel带通滤波器进行滤波输出Mel功率谱
3.对每个滤波器的输出值取对数得到相应频带的对数功率谱。然后对24个对数功率进行
反离散余弦变换得到12个MFCC系数
*/
void get_mfcc(valid_tag *valid, v_ftr_tag *v_ftr, atap_tag *atap_arg)
{
u16 *vc_dat;
u16 h, i;
u32 *frq_spct; //频谱
s16 vc_temp[FRAME_LEN]; //语音暂存区
s32 temp;
u32 pow_spct[tri_num]; //三角滤波器输出对数功率谱
u16 frm_con;
s16 *mfcc_p;
s8 *dct_p;
s32 mid;
u16 v_frm_num;
//USART1_printf("start=%d end=%d",(u32)(valid->start),(u32)(valid->end));
v_frm_num = (((u32)(valid->end)-(u32)(valid->start))/2-FRAME_LEN)/(FRAME_LEN-frame_mov)+1;
if (v_frm_num > vv_frm_max) {
printf("frm_num=%d ", v_frm_num);
v_ftr->frm_num = 0;
} else {
mid = (s32)atap_arg->mid_val;
mfcc_p = v_ftr->mfcc_dat;
frm_con = 0;
//low pass filter
// for (vc_dat = (u16 *)(valid->start); vc_dat <= ((u16 *)(valid->end-FRAME_LEN)); vc_dat += 1) {
// *vc_dat = (u16)(Fir(*vc_dat));
// }
for (vc_dat = (u16 *)(valid->start); vc_dat <= ((u16 *)(valid->end-FRAME_LEN)); vc_dat += (FRAME_LEN-frame_mov)) {
for (i = 0; i < FRAME_LEN; i++) {
//预加重
// printf("vc_dat[%d]=%d ",i,((s32)(*(vc_dat+i))-mid));
temp = ((s32)(*(vc_dat+i))-mid) - hp_ratio(((s32)(*(vc_dat+i-1))-mid));
// printf("vc_hp[%d]=%d ",i,temp);
//加汉明窗 并放大10倍
vc_temp[i] = (s16)(temp*hamm[i]/(hamm_top/10));
// printf("vc_hm[%d]=%d\r\n",i,vc_temp[i]);
}
frq_spct = mfcc_fft(vc_temp, FRAME_LEN);
for (i = 0; i < frq_max; i++) {
//printf("frq_spct[%d]=%d ",i,frq_spct[i]);
frq_spct[i] *= frq_spct[i];//能量谱
//printf("E_spct[%d]=%d\r\n",i,frq_spct[i]);
}
//加三角滤波器
pow_spct[0] = 0;
for (i = 0; i < tri_cen[1]; i++)
pow_spct[0] += (frq_spct[i]*tri_even[i]/(tri_top/10));
for (h = 2; h < tri_num; h += 2) {
pow_spct[h] = 0;
for (i = tri_cen[h-1]; i < tri_cen[h+1]; i++)
pow_spct[h] += (frq_spct[i]*tri_even[i]/(tri_top/10));
}
for (h = 1; h < (tri_num-2); h += 2) {
pow_spct[h] = 0;
for (i = tri_cen[h-1]; i < tri_cen[h+1]; i++)
pow_spct[h] += (frq_spct[i]*tri_odd[i]/(tri_top/10));
}
pow_spct[tri_num-1] = 0;
for (i = tri_cen[tri_num-2]; i < (mfcc_fft_point/2); i++)
pow_spct[tri_num-1] += (frq_spct[i]*tri_odd[i]/(tri_top/10));
//三角滤波器输出取对数
for (h = 0; h < tri_num; h++) {
//USART1_printf("pow_spct[%d]=%d ",h,pow_spct[h]);
pow_spct[h] = (u32)(log(pow_spct[h])*100);//取对数后 乘100 提升数据有效位数
//USART1_printf("%d\r\n",pow_spct[h]);
}
//反离散余弦变换
dct_p = (s8 *)dct_arg;
for (h = 0; h < mfcc_num; h++) {
mfcc_p[h] = 0;
for (i = 0; i < tri_num; i++)
mfcc_p[h] += (((s32)pow_spct[i])*((s32)dct_p[i])/100);
//printf("%d,",mfcc_p[h]);
dct_p += tri_num;
}
//USART1_printf("\r\n");
mfcc_p += mfcc_num;
frm_con++;
}
mfcc_p = v_ftr->mfcc_dat;
normalize(mfcc_p, frm_con);
v_ftr->frm_num = frm_con;
}
}
s16 avg(s16 *mfcc_p, u16 frm_num)
{
int i, j;
double sum = 0.0f;
printf("frm_num = %d, mfcc_num = %d\n", frm_num, mfcc_num);
for (i = 0; i < frm_num; i++)
for (j = 0; j < mfcc_num; j++) {
sum += mfcc_p[i * mfcc_num + j];
// printf("[%d, %d]%f %f ", i, j, sum, mfcc_p[i * mfcc_num + j]);
}
return (s16)(sum / (frm_num * mfcc_num));
}
s16 stdev(s16 *mfcc_p, s16 avg1, u16 frm_num)
{
int i, j;
double sum = 0.0f;
for (i = 0; i < frm_num; i++)
for (j = 0; j < mfcc_num; j++)
sum += (mfcc_p[i * mfcc_num + j] - avg1) * (mfcc_p[i * mfcc_num + j] - avg1);
return (s16)(sqrt(sum / (frm_num * mfcc_num)));
}
void normalize(s16 *mfcc_p, u16 frm_num)
{
int i, j;
s16 avg1 = avg(mfcc_p, frm_num);
s16 stdev1 = stdev(mfcc_p, avg1, frm_num);
printf("avg1 = %d, stdev1 = %d\n", avg1, stdev1);
for (i = 0; i < frm_num; i++)
for (j = 0; j < mfcc_num; j++)
mfcc_p[i * mfcc_num + j] = (mfcc_p[i * mfcc_num + j] - avg1) * 100 / stdev1;
}

View File

@@ -0,0 +1,39 @@
#ifndef _MFCC_H
#define _MFCC_H
#include "VAD.h"
#include "g_def.h"
#define hp_ratio(x) (x*95/100)//预加重系数 0.95
#define mfcc_fft_point 512 //FFT点数
#define frq_max (mfcc_fft_point/2) //最大频率
#define hamm_top 10000 //汉明窗最大值
#define tri_top 1000 //三角滤波器顶点值
#define tri_num 24 //三角滤波器个数
//#define tri_num 17 //三角滤波器个数
#define mfcc_num 12 //MFCC阶数
#define vv_tim_max 2200 //单段有效语音最长时间 ms
#define vv_frm_max ((vv_tim_max-frame_time)/(frame_time-frame_mov_t)+1) //单段有效语音最长帧数
#ifdef __cplusplus
extern "C" {
#endif
//#pragma pack(4)
typedef struct {
u16 save_sign; //存储标记 用于判断flash中特征模板是否有效
u16 frm_num; //帧数
// u8 word_num;
s16 mfcc_dat[vv_frm_max*mfcc_num]; //MFCC转换结果
// float mfcc_dat[vv_frm_max*mfcc_num];
} v_ftr_tag; //语音特征结构体
//#pragma pack()
void get_mfcc(valid_tag *valid, v_ftr_tag *v_ftr, atap_tag *atap_arg);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,135 @@
#ifndef MFCC_Arg
#define MFCC_Arg
#include "g_def.h"
//汉明窗 窗长160
const u16 hamm[] = {
800, 804, 814, 832, 857, 889, 929, 975, 1028, 1088, 1155, 1228, 1308, 1394, 1486, 1585, 1689, 1800, 1915, 2037,
2163, 2295, 2432, 2573, 2718, 2868, 3022, 3179, 3340, 3504, 3671, 3841, 4013, 4187, 4364, 4542, 4721, 4901, 5082,
5264, 5445, 5627, 5808, 5989, 6169, 6348, 6525, 6700, 6873, 7044, 7213, 7378, 7541, 7700, 7856, 8007, 8155, 8298,
8437, 8571, 8701, 8825, 8943, 9056, 9164, 9265, 9361, 9450, 9533, 9610, 9680, 9743, 9799, 9849, 9892, 9927, 9956,
9978, 9992, 9999, 9999, 9992, 9978, 9956, 9927, 9892, 9849, 9799, 9743, 9680, 9610, 9533, 9450, 9361, 9265, 9164,
9056, 8943, 8825, 8701, 8571, 8437, 8298, 8155, 8007, 7856, 7700, 7541, 7378, 7213, 7044, 6873, 6700, 6525, 6348,
6169, 5989, 5808, 5627, 5445, 5264, 5082, 4901, 4721, 4542, 4364, 4187, 4013, 3841, 3671, 3504, 3340, 3179, 3022,
2868, 2718, 2573, 2432, 2295, 2163, 2037, 1915, 1800, 1689, 1585, 1486, 1394, 1308, 1228, 1155, 1088, 1028, 975,
929, 889, 857, 832, 814, 804, 800
};
#if 0 //1024 point fft
//三角滤波器中心频率点
const u16 tri_cen[] = {
11, 22, 33, 44, 55, 66, 77, 88, 99, 110, 121, 134, 152, 171, 191, 214, 237, 263, 291, 321, 354, 389, 427, 468
};
//奇数三角滤波器首尾相连的折线
const u16 tri_odd[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 91, 182, 273, 364, 455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545,
455, 364, 273, 182, 91, 0, 91, 182, 273, 364, 455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545, 455,
364, 273, 182, 91, 0, 91, 182, 273, 364, 455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545, 455, 364,
273, 182, 91, 0, 91, 182, 273, 364, 455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545, 455, 364, 273,
182, 91, 0, 91, 182, 273, 364, 455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545, 455, 364, 273, 182,
91, 0, 77, 154, 231, 308, 385, 462, 538, 615, 692, 769, 846, 923, 1000, 944, 889, 833, 778, 722, 667, 611, 556,
500, 444, 389, 333, 278, 222, 167, 111, 56, 0, 53, 105, 158, 211, 263, 316, 368, 421, 474, 526, 579, 632, 684,
737, 789, 842, 895, 947, 1000, 950, 900, 850, 800, 750, 700, 650, 600, 550, 500, 450, 400, 350, 300, 250, 200,
150, 100, 50, 0, 43, 87, 130, 174, 217, 261, 304, 348, 391, 435, 478, 522, 565, 609, 652, 696, 739, 783, 826,
870, 913, 957, 1000, 957, 913, 870, 826, 783, 739, 696, 652, 609, 565, 522, 478, 435, 391, 348, 304, 261, 217,
174, 130, 87, 43, 0, 38, 77, 115, 154, 192, 231, 269, 308, 346, 385, 423, 462, 500, 538, 577, 615, 654, 692,
731, 769, 808, 846, 885, 923, 962, 1000, 964, 929, 893, 857, 821, 786, 750, 714, 679, 643, 607, 571, 536, 500,
464, 429, 393, 357, 321, 286, 250, 214, 179, 143, 107, 71, 36, 0, 33, 67, 100, 133, 167, 200, 233, 267, 300,
333, 367, 400, 433, 467, 500, 533, 567, 600, 633, 667, 700, 733, 767, 800, 833, 867, 900, 933, 967, 1000, 970,
939, 909, 879, 848, 818, 788, 758, 727, 697, 667, 636, 606, 576, 545, 515, 485, 455, 424, 394, 364, 333, 303,
273, 242, 212, 182, 152, 121, 91, 61, 30, 0, 29, 57, 86, 114, 143, 171, 200, 229, 257, 286, 314, 343, 371, 400,
429, 457, 486, 514, 543, 571, 600, 629, 657, 686, 714, 743, 771, 800, 829, 857, 886, 914, 943, 971, 1000, 974,
947, 921, 895, 868, 842, 816, 789, 763, 737, 711, 684, 658, 632, 605, 579, 553, 526, 500, 474, 447, 421, 395,
368, 342, 316, 289, 263, 237, 211, 184, 158, 132, 105, 79, 53, 26, 0, 24, 49, 73, 98, 122, 146, 171, 195, 220,
244, 268, 293, 317, 341, 366, 390, 415, 439, 463, 488, 512, 537, 561, 585, 610, 634, 659, 683, 707, 732, 756,
780, 805, 829, 854, 878, 902, 927, 951, 976, 1000, 977, 955, 932, 909, 886, 864, 841, 818, 795, 773, 750, 727,
705, 682, 659, 636, 614, 591, 568, 545, 523, 500, 477, 455, 432, 409, 386, 364, 341, 318, 295, 273, 250, 227,
205, 182, 159, 136, 114, 91, 68, 45, 23, 0
};
//偶数三角滤波器首尾相连的折线
const u16 tri_even[] = {
91, 182, 273, 364, 455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545, 455, 364, 273, 182, 91, 0, 91,
182, 273, 364, 455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545, 455, 364, 273, 182, 91, 0, 91, 182,
273, 364, 455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545, 455, 364, 273, 182, 91, 0, 91, 182, 273,
364, 455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545, 455, 364, 273, 182, 91, 0, 91, 182, 273, 364,
455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545, 455, 364, 273, 182, 91, 0, 91, 182, 273, 364, 455,
545, 636, 727, 818, 909, 1000, 923, 846, 769, 692, 615, 538, 462, 385, 308, 231, 154, 77, 0, 56, 111, 167, 222,
278, 333, 389, 444, 500, 556, 611, 667, 722, 778, 833, 889, 944, 1000, 947, 895, 842, 789, 737, 684, 632, 579,
526, 474, 421, 368, 316, 263, 211, 158, 105, 53, 0, 50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600,
650, 700, 750, 800, 850, 900, 950, 1000, 957, 913, 870, 826, 783, 739, 696, 652, 609, 565, 522, 478, 435, 391,
348, 304, 261, 217, 174, 130, 87, 43, 0, 43, 87, 130, 174, 217, 261, 304, 348, 391, 435, 478, 522, 565, 609, 652,
696, 739, 783, 826, 870, 913, 957, 1000, 962, 923, 885, 846, 808, 769, 731, 692, 654, 615, 577, 538, 500, 462,
423, 385, 346, 308, 269, 231, 192, 154, 115, 77, 38, 0, 36, 71, 107, 143, 179, 214, 250, 286, 321, 357, 393, 429,
464, 500, 536, 571, 607, 643, 679, 714, 750, 786, 821, 857, 893, 929, 964, 1000, 967, 933, 900, 867, 833, 800, 767,
733, 700, 667, 633, 600, 567, 533, 500, 467, 433, 400, 367, 333, 300, 267, 233, 200, 167, 133, 100, 67, 33, 0, 30,
61, 91, 121, 152, 182, 212, 242, 273, 303, 333, 364, 394, 424, 455, 485, 515, 545, 576, 606, 636, 667, 697, 727,
758, 788, 818, 848, 879, 909, 939, 970, 1000, 971, 943, 914, 886, 857, 829, 800, 771, 743, 714, 686, 657, 629, 600,
571, 543, 514, 486, 457, 429, 400, 371, 343, 314, 286, 257, 229, 200, 171, 143, 114, 86, 57, 29, 0, 26, 53, 79,
105, 132, 158, 184, 211, 237, 263, 289, 316, 342, 368, 395, 421, 447, 474, 500, 526, 553, 579, 605, 632, 658, 684,
711, 737, 763, 789, 816, 842, 868, 895, 921, 947, 974, 1000, 976, 951, 927, 902, 878, 854, 829, 805, 780, 756, 732,
707, 683, 659, 634, 610, 585, 561, 537, 512, 488, 463, 439, 415, 390, 366, 341, 317, 293, 268, 244, 220, 195, 171,
146, 122, 98, 73, 49, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
#endif
#if 1 //512 point fft
//三角滤波器中心频率点
const u16 tri_cen[] = {
4, 7, 12, 16, 21, 26, 32, 38, 44, 51, 59, 67, 76, 85, 96, 107, 119, 132, 146, 161, 177, 195, 213, 234
};
//奇数三角滤波器首尾相连的折线
const u16 tri_odd[] = {
0, 0, 0, 0, 333, 667, 1000, 800, 600, 400, 200, 0, 250, 500, 750, 1000, 800, 600, 400, 200, 0, 200, 400, 600, 800, 1000, 833, 667, 500, 333,
167, 0, 167, 333, 500, 667, 833, 1000, 833, 667, 500, 333, 167, 0, 143, 286, 429, 571, 714, 857, 1000, 875, 750, 625, 500, 375, 250, 125, 0,
125, 250, 375, 500, 625, 750, 875, 1000, 889, 778, 667, 556, 444, 333, 222, 111, 0, 111, 222, 333, 444, 556, 667, 778, 889, 1000, 909, 818,
727, 636, 545, 455, 364, 273, 182, 91, 0, 91, 182, 273, 364, 455, 545, 636, 727, 818, 909, 1000, 917, 833, 750, 667, 583, 500, 417, 333, 250,
167, 83, 0, 77, 154, 231, 308, 385, 462, 538, 615, 692, 769, 846, 923, 1000, 929, 857, 786, 714, 643, 571, 500, 429, 357, 286, 214, 143, 71, 0,
67, 133, 200, 267, 333, 400, 467, 533, 600, 667, 733, 800, 867, 933, 1000, 938, 875, 813, 750, 688, 625, 563, 500, 438, 375, 313, 250, 188,
125, 63, 0, 56, 111, 167, 222, 278, 333, 389, 444, 500, 556, 611, 667, 722, 778, 833, 889, 944, 1000, 944, 889, 833, 778, 722, 667, 611, 556,
500, 444, 389, 333, 278, 222, 167, 111, 56, 0, 48, 95, 143, 190, 238, 286, 333, 381, 429, 476, 524, 571, 619, 667, 714, 762, 810, 857, 905, 952,
1000, 955, 909, 864, 818, 773, 727, 682, 636, 591, 545, 500, 455, 409, 364, 318, 273, 227, 182, 136, 91, 45, 0
};
//偶数三角滤波器首尾相连的折线
const u16 tri_even[] = {
250, 500, 750, 1000, 667, 334, 0, 200, 400, 600, 800, 1000, 750, 500, 250, 0, 200, 400, 600, 800, 1000, 800, 600, 400, 200, 0, 167, 333, 500,
667, 833, 1000, 833, 667, 500, 333, 167, 0, 167, 333, 500, 667, 833, 1000, 857, 714, 571, 429, 286, 143, 0, 125, 250, 375, 500, 625, 750, 875,
1000, 875, 750, 625, 500, 375, 250, 125, 0, 111, 222, 333, 444, 556, 667, 778, 889, 1000, 889, 778, 667, 556, 444, 333, 222, 111, 0, 91, 182,
273, 364, 455, 545, 636, 727, 818, 909, 1000, 909, 818, 727, 636, 545, 455, 364, 273, 182, 91, 0, 83, 167, 250, 333, 417, 500, 583, 667, 750,
833, 917, 1000, 923, 846, 769, 692, 615, 538, 462, 385, 308, 231, 154, 77, 0, 71, 143, 214, 286, 357, 429, 500, 571, 643, 714, 786, 857, 929,
1000, 933, 867, 800, 733, 667, 600, 533, 467, 400, 333, 267, 200, 133, 67, 0, 63, 125, 188, 250, 313, 375, 438, 500, 563, 625, 688, 750, 813,
875, 938, 1000, 944, 889, 833, 778, 722, 667, 611, 556, 500, 444, 389, 333, 278, 222, 167, 111, 56, 0, 56, 111, 167, 222, 278, 333, 389, 444,
500, 556, 611, 667, 722, 778, 833, 889, 944, 1000, 952, 905, 857, 810, 762, 714, 667, 619, 571, 524, 476, 429, 381, 333, 286, 238, 190, 143,
95, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
#endif
// 反离散余弦变换 余弦表 24*12
const s8 dct_arg[] = {
100, 98, 95, 90, 83, 75, 66, 56, 44, 32, 20, 7, -7, -20, -32, -44, -56, -66, -75, -83, -90, -95, -98, -100,
99, 92, 79, 61, 38, 13, -13, -38, -61, -79, -92, -99, -99, -92, -79, -61, -38, -13, 13, 38, 61, 79, 92, 99,
98, 83, 56, 20, -20, -56, -83, -98, -98, -83, -56, -20, 20, 56, 83, 98, 98, 83, 56, 20, -20, -56, -83, -98,
97, 71, 26, -26, -71, -97, -97, -71, -26, 26, 71, 97, 97, 71, 26, -26, -71, -97, -97, -71, -26, 26, 71, 97,
95, 56, -7, -66, -98, -90, -44, 20, 75, 100, 83, 32, -32, -83, -100, -75, -20, 44, 90, 98, 66, 7, -56, -95,
92, 38, -38, -92, -92, -38, 38, 92, 92, 38, -38, -92, -92, -38, 38, 92, 92, 38, -38, -92, -92, -38, 38, 92,
90, 20, -66, -100, -56, 32, 95, 83, 7, -75, -98, -44, 44, 98, 75, -7, -83, -95, -32, 56, 100, 66, -20, -90,
87, 0, -87, -87, 0, 87, 87, 0, -87, -87, 0, 87, 87, 0, -87, -87, 0, 87, 87, 0, -87, -87, 0, 87,
83, -20, -98, -56, 56, 98, 20, -83, -83, 20, 98, 56, -56, -98, -20, 83, 83, -20, -98, -56, 56, 98, 20, -83,
79, -38, -99, -13, 92, 61, -61, -92, 13, 99, 38, -79, -79, 38, 99, 13, -92, -61, 61, 92, -13, -99, -38, 79,
75, -56, -90, 32, 98, -7, -100, -20, 95, 44, -83, -66, 66, 83, -44, -95, 20, 100, 7, -98, -32, 90, 56, -75,
71, -71, -71, 71, 71, -71, -71, 71, 71, -71, -71, 71, 71, -71, -71, 71, 71, -71, -71, 71, 71, -71, -71, 71,
};
#endif

View File

@@ -0,0 +1,333 @@
/******** VAD.C *******/
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include "g_def.h"
#include "ADC.h"
#include "VAD.h"
#define n_thl_ratio 1 //噪声门限系数 n_thl=n_max_mean*n_thl_ratio
#define s_thl_ratio(x) (x*30/10) //短时幅度判决门限系数 s_thl=sum_mean*s_thl_ratio
#define z_thl_ratio(x) (x*2/160) //短时过零率 判决门限系数 常数
#define atap_frm_t 30 //背景噪音自适应时间帧长度 ms
#define atap_frm_len ((fs/1000)*atap_frm_t) //背景噪音自适应帧长度
u16 vad_data[VcBuf_Len];
u32 frm_n;
/* 求取自适应参数
* noise :噪声起始点
n_len :噪声长度
atap ;自适应参数
*/
void noise_atap(const u16 *noise, u16 n_len, atap_tag *atap)
{
u32 h, i;
u32 n_max;
u32 max_sum;//每一帧噪声最大值 累加取平均 求噪声阈值
u32 n_sum; //所有数值之和 求平均值 确定零(中)值
u32 mid; //中值
u32 abs; //绝对值
u32 abs_sum;//绝对值和
u32 frm_num;
if ((n_len%atap_frm_len) != 0) //参数检查
return;
frm_num = n_len/atap_frm_len;
n_sum = 0;
max_sum = 0;
for (i = 0; i < n_len; i++)
n_sum += *(noise+i);
mid = n_sum/i;
abs_sum = 0;
for (i = 0; i < n_len; i += atap_frm_len) {
n_max = 0;
for (h = 0; h < atap_frm_len; h++) {
abs = (*(noise+i+h) > mid)?(*(noise+i+h)-mid):(mid-*(noise+i+h));
if (abs > n_max) ////取每帧最大绝对值
n_max = abs;
abs_sum += abs;
}
max_sum += n_max;
//USART1_printf("n_max=%d ", n_max);
//USART1_printf("max_sum=%d\r\n", max_sum);
}
abs_sum /= (n_len/FRAME_LEN);
max_sum /= frm_num;
atap->mid_val = mid;
atap->n_thl = max_sum*n_thl_ratio;
atap->s_thl = s_thl_ratio(abs_sum);
atap->z_thl = z_thl_ratio(FRAME_LEN)/n_thl_ratio;
printf("VAD sum=%d ", atap->s_thl);
printf("VAD zero=%d\n", atap->z_thl);
}
#define v_durmin_t 200 //有效语音最短时间门限 ms
#define v_durmin_f v_durmin_t/(frame_time-frame_mov_t) //有效语音最短帧数
#define s_durmax_t 210 //无声段最长时间门限 ms
#define s_durmax_f s_durmax_t/(frame_time-frame_mov_t)//无声段最长帧数
/*******
* VAD (Voice activity detection) 语音活动检测
检测出一段声音中的有效语音 起始点和长度 最多3段语音
短时幅度 短时过零率求取:
短时幅度直接累加
短时过零率改进为过门限率,设置正负两个绝对值相等的门限。
构成门限带,穿过门限带计作过零
端点判决:
1.判断语音起始点,要求能够滤除突发性噪声
突发性噪声可以引起短时能量或过零率的数值很高,但是往往不能维持足够长的时间,
如门窗的开关,物体的碰撞等引起的噪声,这些都可以通过设定最短时间门限来判别。
超过两门限之一或全部,并且持续时间超过有效语音最短时间门限,
返回最开始超过门限的时间点,将其标记为有效语音起始点。
2.判断语音结束点,要求不能丢弃连词中间短暂的有可能被噪声淹没的“寂静段”
同时低于两门限,并且持续时间超过无声最长时间门限,
返回最开始低于门限的时间点,将其标记为有效语音结束点。
*********/
void VAD(const u16 *vc, u16 buf_len, valid_tag *valid_voice, atap_tag *atap_arg)
{
u8 last_sig = 0; // 上次跃出门限带的状态 1:门限带以下2:门限带以上
u8 cur_stus = 0; // 当前语音段状态 0无声段 1前端过渡段 2语音段 3后端过渡段
u16 front_duration = 0;//前端过渡段超过门限值持续帧数
u16 back_duration = 0;//后端过渡段低于门限值持续帧数
u32 h, i;
u32 frm_sum; // 短时绝对值和
u32 frm_zero; // 短时过零(门限)率
u32 a_thl; // 上门限值
u32 b_thl; // 下门限值
u8 valid_con = 0;//语音段计数 最大max_vc_con
u32 frm_con = 0; //帧计数
a_thl = atap_arg->mid_val+atap_arg->n_thl;
b_thl = atap_arg->mid_val-atap_arg->n_thl;
for (i = 0; i < max_vc_con; i++) {
((valid_tag *)(valid_voice+i))->start = (void *)0;
((valid_tag *)(valid_voice+i))->end = (void *)0;
}
for (i = 0; i < (buf_len-FRAME_LEN); i += (FRAME_LEN-frame_mov)) {
frm_con++;
frm_sum = 0;
for (h = 0; h < FRAME_LEN; h++)//短时绝对值和
frm_sum += (*(vc+i+h) > (atap_arg->mid_val))?(*(vc+i+h)-(atap_arg->mid_val)):((atap_arg->mid_val)-*(vc+i+h));
frm_zero = 0;
for (h = 0; h < (FRAME_LEN-1); h++) {//短时过门限率
if (*(vc+i+h) >= a_thl) //大于上门限值
last_sig = 2;
else if (*(vc+i+h) < b_thl) //小于下门限值
last_sig = 1;
if (*(vc+i+h+1) >= a_thl) {
if (last_sig == 1)
frm_zero++;
} else if (*(vc+i+h+1) < b_thl) {
if (last_sig == 2)
frm_zero++;
}
}
//USART1_printf("frm_con=%d ",frm_con);
//USART1_printf("frm_sum=%d ",frm_sum);
//USART1_printf("frm_zero=%d\r\n",frm_zero);
if ((frm_sum > (atap_arg->s_thl)) || (frm_zero > (atap_arg->z_thl))) {
//至少有一个参数超过其门限值
// if(frm_sum>(atap_arg->s_thl))
// printf("frm_sum ok\n");
// else
// printf("frm_zero ok\n");
if (cur_stus == 0) {//如果当前是无声段
cur_stus = 1; //进入前端过渡段
front_duration = 1; //前端过渡段持续帧数置1 第一帧
} else if (cur_stus == 1) {//当前是前端过渡段
front_duration++;
if (front_duration >= v_durmin_f) { //前端过渡段帧数超过最短有效语音帧数
cur_stus = 2; //进入语音段
((valid_tag *)(valid_voice+valid_con))->start = (u16 *)vc+i-((v_durmin_f-1)*(FRAME_LEN-frame_mov));//记录起始帧位置
front_duration = 0; //前端过渡段持续帧数置0
}
} else if (cur_stus == 3) { //如果当前是后端过渡段 两参数都回升到门限值以上
back_duration = 0;
cur_stus = 2; //记为语音段
}
} else {//两参数都在门限值以下
// printf("frm not ok\n");
if (cur_stus == 2) {//当前是语音段
cur_stus = 3;//设为后端过渡段
back_duration = 1; //前端过渡段持续帧数置1 第一帧
} else if (cur_stus == 3) {//当前是后端过渡段
back_duration++;
if (back_duration >= s_durmax_f) { //后端过渡段帧数超过最长无声帧数
cur_stus = 0; //进入无声段
((valid_tag *)(valid_voice+valid_con))->end = (u16 *)vc+i-(s_durmax_f*(FRAME_LEN-frame_mov))+FRAME_LEN;//记录结束帧位置
valid_con++;
if (valid_con == max_vc_con)
return;
back_duration = 0;
}
} else if (cur_stus == 1) {//当前是前端过渡段 两参数都回落到门限值以下
//持续时间低于语音最短时间门限 视为短时噪声
front_duration = 0;
cur_stus = 0; //记为无声段
}
}
}
}
u8 VAD2(const u16 *vc, valid_tag *valid_voice, atap_tag *atap_arg)
{
u8 last_sig = 0; // 上次跃出门限带的状态 1:门限带以下2:门限带以上
static u8 cur_stus; // 当前语音段状态 0无声段 1前端过渡段 2语音段 3后端过渡段
static u16 front_duration;//前端过渡段超过门限值持续帧数
static u16 back_duration;//后端过渡段低于门限值持续帧数
static u8 word_num_tmp;
u32 h, i;
u32 frm_sum; // 短时绝对值和
u32 frm_zero; // 短时过零(门限)率
u32 a_thl; // 上门限值
u32 b_thl; // 下门限值
a_thl = atap_arg->mid_val+atap_arg->n_thl;
b_thl = atap_arg->mid_val-atap_arg->n_thl;
frm_sum = 0;
for (h = 0; h < FRAME_LEN; h++)//短时绝对值和
frm_sum += (*(vc+h) > (atap_arg->mid_val))?(*(vc+h)-(atap_arg->mid_val)):((atap_arg->mid_val)-*(vc+h));
frm_zero = 0;
for (h = 0; h < (FRAME_LEN-1); h++) {//短时过门限率
if (*(vc+h) >= a_thl) //大于上门限值
last_sig = 2;
else if (*(vc+h) < b_thl) //小于下门限值
last_sig = 1;
if (*(vc+h+1) >= a_thl) {
if (last_sig == 1)
frm_zero++;
} else if (*(vc+h+1) < b_thl) {
if (last_sig == 2)
frm_zero++;
}
}
// printf("frm_sum=%d\n",frm_sum);
// printf("frm_zero=%d ",frm_zero);
if (FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n > VcBuf_Len - (FRAME_LEN - frame_mov)) {//over frame length
cur_stus = 0; //进入无声段
frm_n = 0;
printf("I am here\n");
return 0;
}
if ((frm_sum > (atap_arg->s_thl)) || (frm_zero > (atap_arg->z_thl))) {
//至少有一个参数超过其门限值
if (frm_sum > (atap_arg->s_thl))
printf("frm_sum ok\n");
else
printf("frm_zero ok\n");
if (cur_stus == 0) {//如果当前是无声段
cur_stus = 1; //进入前端过渡段
front_duration = 1; //前端过渡段持续帧数置1 第一帧
frm_n = 0;
word_num_tmp = 1;
for (i = 0; i < FRAME_LEN; i++) //copy the valid data
vad_data[i] = vc[i];
} else if (cur_stus == 1) { //当前是前端过渡段
front_duration++;
if (front_duration >= v_durmin_f) {//前端过渡段帧数超过最短有效语音帧数
cur_stus = 2; //进入语音段
front_duration = 0; //前端过渡段持续帧数置0
valid_voice[0].start = (u16 *)vad_data;
}
for (i = 0; i < FRAME_LEN - frame_mov; i++)//copy the valid data
vad_data[FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n + i] = vc[i + frame_mov];
frm_n++;
if (FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n > VcBuf_Len + frame_mov - FRAME_LEN) {
cur_stus = 0; //进入无声段
valid_voice[0].end = (u16 *)vad_data + VcBuf_Len;//记录结束帧位置
// valid_voice[0].word_num = word_num_tmp;
return 1;
}
} else if (cur_stus == 3) { //如果当前是后端过渡段 两参数都回升到门限值以上
if (back_duration > 5)
word_num_tmp++;
// printf("back_duration = %d\n", back_duration);
back_duration = 0;
cur_stus = 2; //记为语音段
for (i = 0; i < FRAME_LEN - frame_mov; i++)//copy the valid data
vad_data[FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n + i] = vc[i + frame_mov];
frm_n++;
if (FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n > VcBuf_Len + frame_mov - FRAME_LEN) {
cur_stus = 0; //进入无声段
valid_voice[0].end = (u16 *)vad_data + VcBuf_Len;//记录结束帧位置
// valid_voice[0].word_num = word_num_tmp;
return 1;
}
} else if (cur_stus == 2) {
for (i = 0; i < FRAME_LEN - frame_mov; i++)//copy the valid data
vad_data[FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n + i] = vc[i + frame_mov];
frm_n++;
if (FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n > VcBuf_Len + frame_mov - FRAME_LEN) {
cur_stus = 0; //进入无声段
valid_voice[0].end = (u16 *)vad_data + VcBuf_Len;//记录结束帧位置
// valid_voice[0].word_num = word_num_tmp;
return 1;
}
}
} else {//两参数都在门限值以下
// printf("frm error\n");
if (cur_stus == 2) {//当前是语音段
cur_stus = 3;//设为后端过渡段
back_duration = 1; //前端过渡段持续帧数置1 第一帧
#if 0
for (i = 0; i < FRAME_LEN - frame_mov; i++)//copy the valid data
vad_data[FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n + i] = vc[i + frame_mov];
frm_n++;
if (FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n > VcBuf_Len + frame_mov - FRAME_LEN) {
cur_stus = 0; //进入无声段
valid_voice[0].end = (u16 *)vad_data + VcBuf_Len;//记录结束帧位置
// valid_voice[0].word_num = word_num_tmp;
return 1;
}
#endif
} else if (cur_stus == 3) {//当前是后端过渡段
back_duration++;
#if 0
for (i = 0; i < FRAME_LEN - frame_mov; i++)//copy the valid data
vad_data[FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n + i] = vc[i + frame_mov];
frm_n++;
if (FRAME_LEN + (FRAME_LEN-frame_mov) * frm_n > VcBuf_Len + frame_mov - FRAME_LEN) {
cur_stus = 0; //进入无声段
valid_voice[0].end = (u16 *)vad_data + VcBuf_Len;//记录结束帧位置
// valid_voice[0].word_num = word_num_tmp;
return 1;
}
#endif
if (back_duration >= s_durmax_f) {//后端过渡段帧数超过最长无声帧数
cur_stus = 0; //进入无声段
back_duration = 0;
frm_n = frm_n - s_durmax_f;
valid_voice[0].end = (u16 *)vad_data+frm_n*(FRAME_LEN-frame_mov)+FRAME_LEN;//记录结束帧位置
// valid_voice[0].word_num = word_num_tmp;
return 1;
}
} else if (cur_stus == 1) {//当前是前端过渡段 两参数都回落到门限值以下
//持续时间低于语音最短时间门限 视为短时噪声
front_duration = 0;
cur_stus = 0; //记为无声段
frm_n = 0;
}
}
return 0;
}

View File

@@ -0,0 +1,42 @@
#ifndef _VAD_H
#define _VAD_H
#include "g_def.h"
#define max_vc_con 3 //VAD最多检测的语音段数
#define frame_time 20 // 每帧时间长度 单位ms
#define frame_mov_t 10 // 帧移
#define FRAME_LEN (frame_time*fs/1000) // 帧长
#define frame_mov (frame_mov_t *fs/1000) // 帧移,相邻帧交叠部分
#ifdef __cplusplus
extern "C" {
#endif
#pragma pack(1)
typedef struct {
u32 mid_val; //语音段中值 相当于有符号的0值 用于短时过零率计算
u16 n_thl; //噪声阈值,用于短时过零率计算
u16 z_thl; //短时过零率阈值,超过此阈值,视为进入过渡段。
u32 s_thl; //短时累加和阈值,超过此阈值,视为进入过渡段。
} atap_tag; //自适应参数
#pragma pack()
//#pragma pack(4)
typedef struct {
u16 *start; //起始点
u16 *end; //结束点
// u8 word_num;
} valid_tag; //有效语音段
//#pragma pack()
void noise_atap(const u16 *noise, u16 n_len, atap_tag *atap);
void VAD(const u16 *vc, u16 buf_len, valid_tag *valid_voice, atap_tag *atap_arg);
u8 VAD2(const u16 *vc, valid_tag *valid_voice, atap_tag *atap_arg);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,105 @@
/* 适用于STM32F103VE
* 全部falsh 512KB 256页 每页2KB
每个语音特征模板占用4KB 采用冗余模板 每个语音指令4个特征模板
初步设计设定20个语音指令 共占用320KB
flash最后320KB用于存储语音特征模板
编译器需设置 以免存储区被代码占用
烧写程序时也不能擦除存储区 选擦除需要的页
*/
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "g_def.h"
#include "flash.h"
#include "uarths.h"
#include "MFCC.h"
//#include "voice_modle.h"
v_ftr_tag ftr_save[20 * 4];
u8 save_ftr_mdl(v_ftr_tag *ftr, u32 addr)
{
// u32 ftr_size;
addr = addr / size_per_ftr;
if (addr > 40) {
printf("flash addr error");
return Flash_Fail;
}
ftr->save_sign = save_mask;
ftr_save[addr] = *ftr;
// ftr_size=2*mfcc_num*ftr->frm_num;
return Flash_Success;
}
#if 0
void init_voice_mdl(void)
{
uint16_t i, j, comm;
for (comm = 0; comm < 4; comm++) {
for (j = 0; j < 4; j++) {
ftr_save[comm*4+j].save_sign = save_mask;
ftr_save[comm*4+j].frm_num = mdl_fram_num[comm*4+j];
// ftr_save[comm*4+j].word_num = 2;
}
}
//
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
ftr_save[0].mfcc_dat[i] = start1[i];
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
ftr_save[1].mfcc_dat[i] = start2[i];
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
ftr_save[2].mfcc_dat[i] = start3[i];
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
ftr_save[3].mfcc_dat[i] = start4[i];
//
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
ftr_save[4].mfcc_dat[i] = pause1[i];
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
ftr_save[5].mfcc_dat[i] = pause2[i];
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
ftr_save[6].mfcc_dat[i] = pause3[i];
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
ftr_save[7].mfcc_dat[i] = pause4[i];
//
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
ftr_save[8].mfcc_dat[i] = cancle1[i];
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
ftr_save[9].mfcc_dat[i] = cancle2[i];
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
ftr_save[10].mfcc_dat[i] = cancle3[i];
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
ftr_save[11].mfcc_dat[i] = cancle4[i];
//
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
ftr_save[12].mfcc_dat[i] = confirm1[i];
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
ftr_save[13].mfcc_dat[i] = confirm2[i];
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
ftr_save[14].mfcc_dat[i] = confirm3[i];
for (i = 0; i < (vv_frm_max * mfcc_num); i++)
ftr_save[15].mfcc_dat[i] = confirm4[i];
}
#endif

View File

@@ -0,0 +1,39 @@
#ifndef __FLASH_H
#define __FLASH_H
#include "g_def.h"
#include "MFCC.h"
#define FLASH_PAGE_SIZE 2048
#define Flash_Fail 3
#define Flash_Success 0
#define save_mask 12345
#define size_per_ftr (4*1024)
#define page_per_ftr (size_per_ftr/FLASH_PAGE_SIZE)
#define ftr_per_comm 4
#define size_per_comm (ftr_per_comm*size_per_ftr)
#define comm_num 10
#define ftr_total_size (size_per_comm*comm_num)
//#define ftr_end_addr 0x8080000
#define ftr_end_addr (size_per_ftr * ftr_per_comm * comm_num)
#define ftr_start_addr 0 //(ftr_end_addr-ftr_total_size)
#ifdef __cplusplus
extern "C" {
#endif
u8 save_ftr_mdl(v_ftr_tag *ftr, u32 addr);
//void init_voice_mdl(void);
extern v_ftr_tag ftr_save[20 * 4];
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,13 @@
#ifndef _G_DEF_H_
#define _G_DEF_H_
typedef uint32_t u32;
typedef uint16_t u16;
typedef uint8_t u8;
typedef int32_t s32;
typedef int16_t s16;
typedef int8_t s8;
#endif