00001
00018 #include "vad.h"
00019 #include <cassert>
00020
00021
00022
00047 bool
00048 vad::Reset (const vad_config & cfg, DOUBLE snr, FLOAT FrameLen, XFFT_WIN WinType)
00049 {
00050
00051 assert ((m_LTWindowLen-1)%2 == 0);
00052 assert ((m_LTWindowLen-1/2) >= m_ActWindowLen);
00053
00054
00055 Destruct ();
00056
00057
00058 m_FrameLen = FrameLen;
00059 m_WinType = WinType;
00060 m_SNR = snr;
00061
00062 m_SNR0 = cfg.SNR0;
00063 m_SNR1 = cfg.SNR1;
00064 m_gamma0 = cfg.gamma0;
00065 m_gamma1 = cfg.gamma1;
00066
00067 m_ActWindowLen = cfg.ActWindowLen;
00068 m_LTWindowLen = cfg.LTWindowLen;
00069 m_Handover = cfg.Handover;
00070 m_offset = cfg.offset;
00071 m_Nalfa = cfg.Nalfa;
00072 m_SNRalfa = cfg.SNRalfa;
00073
00074
00075 m_NumSil = 0;
00076 m_NumVoice = 0;
00077 m_HoSil = 0;
00078 m_NoisePow = 0;
00079 m_SignalPow = 0;
00080 m_NoiseInitFrames = 0;
00081 m_bufferedFrames = 0;
00082
00083
00084 m_fftObject = rfft_construct (FrameLen, FrameLen, FFT_DIRECT, 1, WinType, NULL);
00085 m_Nfft = rfft_getfftnp(m_fftObject)/2;
00086
00087 m_NoiseFFT = (pDOUBLE) xmalloc (m_Nfft*sizeof(DOUBLE));
00088 m_NoiseFFTAct = (pDOUBLE) xmalloc (m_Nfft*sizeof(DOUBLE));
00089 m_LTSE = (pDOUBLE) xmalloc (m_Nfft*sizeof(DOUBLE));
00090
00091 m_memFFT.Resize(m_LTWindowLen, m_Nfft);
00092
00093
00094
00095 for (INT i = 0; i < m_LTWindowLen; ++i)
00096
00097 memset (m_memFFT[i], 0, m_Nfft*sizeof(DOUBLE));
00098
00099
00100 if ( m_fftObject == NULL
00101 || m_NoiseFFT == NULL
00102 || m_NoiseFFTAct == NULL
00103 || m_LTSE == NULL)
00104 {
00105 Destruct ();
00106 return false;
00107 }
00108
00109
00110 memset (m_NoiseFFT, 0, m_Nfft*sizeof(DOUBLE));
00111 memset (m_NoiseFFTAct, 0, m_Nfft*sizeof(DOUBLE));
00112 memset (m_LTSE, 0, m_Nfft*sizeof(DOUBLE));
00113
00114 return true;
00115 }
00116
00129 void
00130 vad::NoiseInit (pDOUBLE frame)
00131 {
00132
00133 rfft_rfft (m_fftObject, frame);
00134 rfft_mag (m_fftObject);
00135 pDOUBLE newFFT = rfft_getrevec (m_fftObject);
00136
00137
00138
00139
00140 DOUBLE rescale = DOUBLE(m_NoiseInitFrames)/DOUBLE(m_NoiseInitFrames+1);
00141 if (m_NoiseInitFrames != 0)
00142 for (INT i = 0; i < m_Nfft; ++i)
00143 m_NoiseFFT[i] = m_NoiseFFT[i]*rescale + newFFT[i]/(m_NoiseInitFrames+1);
00144 else
00145 for (INT i = 0; i < m_Nfft; ++i)
00146 m_NoiseFFT[i] = newFFT[i];
00147
00148
00149 for (INT i = 0; i < m_Nfft; ++i)
00150 m_NoisePow += m_NoiseFFT[i]*m_NoiseFFT[i];
00151 m_NoisePow /= m_Nfft;
00152
00153
00154
00155 m_SignalPow = pow(10,m_SNR/10) * m_NoisePow;
00156
00157 ++m_NoiseInitFrames;
00158 }
00159
00180 bool
00181 vad::doVAD (pDOUBLE frame)
00182 {
00183
00184 AddToBuffer (frame);
00185
00186
00187
00188
00189
00190 if (m_bufferedFrames < (m_LTWindowLen-1)/2)
00191 {
00192 ++m_bufferedFrames;
00193 return false;
00194 }
00195
00196
00197 LTSE();
00198
00199
00200 DOUBLE ltsd = LTSD();
00201
00202
00203 DOUBLE gamma = Gamma();
00204
00205
00206 if (ltsd+m_offset >= gamma)
00207 {
00208 ActualizeSignal();
00209 return true;
00210 }
00211 else if (m_HoSil < m_Handover)
00212 {
00213 ++m_HoSil;
00214 return true;
00215 }
00216 else
00217 {
00218 ActualizeNoise();
00219 return false;
00220 }
00221 }
00222
00223
00224
00228 void
00229 vad::Destruct()
00230 {
00231 if (m_NoiseFFT) xfree(m_NoiseFFT);
00232 if (m_NoiseFFTAct) xfree(m_NoiseFFTAct);
00233 if (m_LTSE) xfree(m_LTSE);
00234 if (m_fftObject) rfft_destruct(m_fftObject);
00235 }
00236
00237
00238 DOUBLE
00239 vad::Gamma() const
00240 {
00241 if (m_SNR <= m_SNR0)
00242 return m_gamma0;
00243 else if (m_SNR >= m_SNR1)
00244 return m_gamma1;
00245 else
00246 return ((m_gamma0-m_gamma1)/(m_SNR0-m_SNR1)*m_SNR + m_gamma0 - (m_gamma0-m_gamma1)/(1-m_SNR1/m_SNR0));
00247 }
00248
00249 void
00250 vad::ActualizeSignal()
00251 {
00252 INT M = (m_NumVoice < m_ActWindowLen) ? m_NumVoice : m_ActWindowLen;
00253 DOUBLE SignalPAct = 0;
00254
00255 for (int i = (m_LTWindowLen-1)/2-M+1; i <= (m_LTWindowLen-1)/2+1 ; ++i)
00256 {
00257 for (INT j = 0; j < m_Nfft; ++j)
00258 SignalPAct += m_memFFT[i][j]*m_memFFT[i][j];
00259 }
00260 SignalPAct /= ((M+1)*m_Nfft);
00261 m_SignalPow = m_SNRalfa*m_SignalPow + (1-m_SNRalfa)*SignalPAct;
00262
00263
00264 m_SNR = 10*log10(m_SignalPow/m_NoisePow);
00265
00266
00267 ++m_NumVoice;
00268 m_NumSil = 0;
00269 m_HoSil = 0;
00270 }
00271
00272 void
00273 vad::ActualizeNoise()
00274 {
00275
00276 memset (m_NoiseFFTAct, 0, m_Nfft*sizeof(DOUBLE));
00277
00278 INT M = (m_NumSil < m_ActWindowLen) ? m_NumSil : m_ActWindowLen;
00279 DOUBLE NoisePAct = 0;
00280
00281 for (INT i = (m_LTWindowLen-1)/2-M+1; i <= (m_LTWindowLen-1)/2+1 ; ++i)
00282 {
00283 for (INT j = 0; j < m_Nfft; ++j)
00284 {
00285 NoisePAct += m_memFFT[i][j]*m_memFFT[i][j];
00286 m_NoiseFFTAct[j] += m_memFFT[i][j];
00287 }
00288 }
00289
00290
00291 NoisePAct /= ((M+1)*m_Nfft);
00292 for (INT i = 0; i < m_Nfft; ++i)
00293 m_NoiseFFT[i] = m_Nalfa*m_NoiseFFT[i] + (1-m_Nalfa)*m_NoiseFFTAct[i]/(M+1);
00294
00295 m_NoisePow = m_Nalfa*m_NoisePow + (1-m_Nalfa)*NoisePAct;
00296
00297
00298 m_SNR = 10*log10(m_SignalPow/m_NoisePow);
00299
00300
00301 ++m_NumSil;
00302 m_NumVoice = 0;
00303 }
00304
00305 void
00306 vad::AddToBuffer (pDOUBLE frame)
00307 {
00308
00309 rfft_rfft (m_fftObject, frame);
00310 rfft_mag (m_fftObject);
00311 pDOUBLE newFFT = rfft_getrevec (m_fftObject);
00312
00313
00314 m_memFFT.Push(newFFT);
00315 }
00316
00317 void
00318 vad::LTSE ()
00319 {
00320
00321 memset (m_LTSE, 0, m_Nfft*sizeof(DOUBLE));
00322
00323
00324 for (INT i = 0; i < m_LTWindowLen; ++i)
00325 {
00326 for (INT j = 0; j < m_Nfft; ++j)
00327 if (m_memFFT[i][j] > m_LTSE[j])
00328 m_LTSE[j] = m_memFFT[i][j];
00329 }
00330
00331
00332
00333 }
00334
00335 DOUBLE
00336 vad::LTSD () const
00337 {
00338 DOUBLE sal = 0;
00339 for (INT i = 0; i < m_Nfft; ++i)
00340 sal += (m_LTSE[i]*m_LTSE[i]) / (m_NoiseFFT[i]*m_NoiseFFT[i]);
00341 sal = 10*log10(sal/m_Nfft);
00342 return sal;
00343 }
00344