欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

音频自动增益 与 静音检测 算法 附完整C代码

程序员文章站 2022-06-19 08:40:45
前面分享过一个算法《音频增益响度分析 ReplayGain 附完整C代码示例》 主要用于评估一定长度音频的音量强度, 而分析之后,很多类似的需求,肯定是做音频增益,提高音量诸如此类做法。 不过在项目实测的时候,其实真的很难定标准, 到底在什么样的环境下,要增大音量,还是降低。 在通讯行业一般的做法就 ......

前面分享过一个算法《音频增益响度分析 ReplayGain 附完整C代码示例

主要用于评估一定长度音频的音量强度,

而分析之后,很多类似的需求,肯定是做音频增益,提高音量诸如此类做法。

不过在项目实测的时候,其实真的很难定标准,

到底在什么样的环境下,要增大音量,还是降低。

在通讯行业一般的做法就是采用静音检测,

一旦检测为静音或者噪音,则不做处理,反之通过一定的策略进行处理。

这里就涉及到两个算法,一个是静音检测,一个是音频增益。

增益其实没什么好说的,类似于数据归一化拉伸的做法。

静音检测 在WebRTC中 是采用计算GMM (Gaussian Mixture Model,高斯混合模型)进行特征提取的。

在很长一段时间里面,音频特征 有3个主要的方法,

GMM  ,Spectrogram (声谱图), MFCC 即 Mel-Frequency Cepstrum(Mel频率倒谱)

恕我直言,GMM 提取的特征,其鲁棒性 不如后两者。

也不多做介绍,感兴趣的同学,翻翻 * ,补补课。

当然在实际使用算法时,会由此延伸出来一些小技巧。

例如,用静音检测 来做音频裁剪,或者搭配音频增益做一些音频增强之类的操作。

自动增益在WebRTC 源代码文件是:analog_agc.c 和 digital_agc.c

静音检测 源代码文件是: webrtc_vad.c

这个命名,有一定的历史原因了。

经过梳理后,

增益算法为 agc.c agc.h

静音检测为 vad.c vad.h

增益算法的完整示例代码:

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
//采用https://github.com/mackron/dr_libs/blob/master/dr_wav.h 解码
#define DR_WAV_IMPLEMENTATION
#include "dr_wav.h"
#include "agc.h"

#ifndef nullptr
#define nullptr 0
#endif

#ifndef MIN
#define  MIN(A, B)        ((A) < (B) ? (A) : (B))
#endif

//写wav文件
void wavWrite_int16(char *filename, int16_t *buffer, size_t sampleRate, size_t totalSampleCount) {
    drwav_data_format format = {};
    format.container = drwav_container_riff;     // <-- drwav_container_riff = normal WAV files, drwav_container_w64 = Sony Wave64.
    format.format = DR_WAVE_FORMAT_PCM;          // <-- Any of the DR_WAVE_FORMAT_* codes.
    format.channels = 1;
    format.sampleRate = (drwav_uint32) sampleRate;
    format.bitsPerSample = 16;
    drwav *pWav = drwav_open_file_write(filename, &format);
    if (pWav) {
        drwav_uint64 samplesWritten = drwav_write(pWav, totalSampleCount, buffer);
        drwav_uninit(pWav);
        if (samplesWritten != totalSampleCount) {
            fprintf(stderr, "ERROR\n");
            exit(1);
        }
    }
}

//读取wav文件
int16_t *wavRead_int16(char *filename, uint32_t *sampleRate, uint64_t *totalSampleCount) {
    unsigned int channels;
    int16_t *buffer = drwav_open_and_read_file_s16(filename, &channels, sampleRate, totalSampleCount);
    if (buffer == nullptr) {
        printf("读取wav文件失败.");
    }
    //仅仅处理单通道音频
    if (channels != 1) {
        drwav_free(buffer);
        buffer = nullptr;
        *sampleRate = 0;
        *totalSampleCount = 0;
    }
    return buffer;
}

//分割路径函数
void splitpath(const char *path, char *drv, char *dir, char *name, char *ext) {
    const char *end;
    const char *p;
    const char *s;
    if (path[0] && path[1] == ':') {
        if (drv) {
            *drv++ = *path++;
            *drv++ = *path++;
            *drv = '\0';
        }
    } else if (drv)
        *drv = '\0';
    for (end = path; *end && *end != ':';)
        end++;
    for (p = end; p > path && *--p != '\\' && *p != '/';)
        if (*p == '.') {
            end = p;
            break;
        }
    if (ext)
        for (s = end; (*ext = *s++);)
            ext++;
    for (p = end; p > path;)
        if (*--p == '\\' || *p == '/') {
            p++;
            break;
        }
    if (name) {
        for (s = p; s < end;)
            *name++ = *s++;
        *name = '\0';
    }
    if (dir) {
        for (s = path; s < p;)
            *dir++ = *s++;
        *dir = '\0';
    }
}


int agcProcess(int16_t *buffer, uint32_t sampleRate, size_t samplesCount, int16_t agcMode) {
    if (buffer == nullptr) return -1;
    if (samplesCount == 0) return -1;
    WebRtcAgcConfig agcConfig;
    agcConfig.compressionGaindB = 9; // default 9 dB
    agcConfig.limiterEnable = 1; // default kAgcTrue (on)
    agcConfig.targetLevelDbfs = 3; // default 3 (-3 dBOv)
    int minLevel = 0;
    int maxLevel = 255;
    size_t samples = MIN(160, sampleRate / 100);
    if (samples == 0) return -1;
    const int maxSamples = 320;
    int16_t *input = buffer;
    size_t nTotal = (samplesCount / samples);
    void *agcInst = WebRtcAgc_Create();
    if (agcInst == NULL) return -1;
    int status = WebRtcAgc_Init(agcInst, minLevel, maxLevel, agcMode, sampleRate);
    if (status != 0) {
        printf("WebRtcAgc_Init fail\n");
        WebRtcAgc_Free(agcInst);
        return -1;
    }
    status = WebRtcAgc_set_config(agcInst, agcConfig);
    if (status != 0) {
        printf("WebRtcAgc_set_config fail\n");
        WebRtcAgc_Free(agcInst);
        return -1;
    }
    size_t num_bands = 1;
    int inMicLevel, outMicLevel = -1;
    int16_t out_buffer[maxSamples];
    int16_t *out16 = out_buffer;
    uint8_t saturationWarning = 1;                 //是否有溢出发生,增益放大以后的最大值超过了65536
    int16_t echo = 0;                                 //增益放大是否考虑回声影响
    for (int i = 0; i < nTotal; i++) {
        inMicLevel = 0;
        int nAgcRet = WebRtcAgc_Process(agcInst, (const int16_t *const *) &input, num_bands, samples,
                                        (int16_t *const *) &out16, inMicLevel, &outMicLevel, echo,
                                        &saturationWarning);

        if (nAgcRet != 0) {
            printf("failed in WebRtcAgc_Process\n");
            WebRtcAgc_Free(agcInst);
            return -1;
        }
        memcpy(input, out_buffer, samples * sizeof(int16_t));
        input += samples;
    }
    WebRtcAgc_Free(agcInst);
    return 1;
}

void auto_gain(char *in_file, char *out_file) {
    //音频采样率
    uint32_t sampleRate = 0;
    //总音频采样数
    uint64_t inSampleCount = 0;
    int16_t *inBuffer = wavRead_int16(in_file, &sampleRate, &inSampleCount);
    //如果加载成功
    if (inBuffer != nullptr) {
        //  kAgcModeAdaptiveAnalog  模拟音量调节
        //  kAgcModeAdaptiveDigital 自适应增益
        //  kAgcModeFixedDigital 固定增益
        agcProcess(inBuffer, sampleRate, inSampleCount, kAgcModeAdaptiveDigital);
        wavWrite_int16(out_file, inBuffer, sampleRate, inSampleCount);
        free(inBuffer);
    }
}

int main(int argc, char *argv[]) {
    printf("WebRTC Automatic Gain Control\n");
    printf("博客:http://cpuimage.cnblogs.com/\n");
    printf("音频自动增益\n");
    if (argc < 2)
        return -1;
    char *in_file = argv[1];
    char drive[3];
    char dir[256];
    char fname[256];
    char ext[256];
    char out_file[1024];
    splitpath(in_file, drive, dir, fname, ext);
    sprintf(out_file, "%s%s%s_out%s", drive, dir, fname, ext);
    auto_gain(in_file, out_file);

    printf("按任意键退出程序 \n");
    getchar();
    return 0;
}

 

 静音检测完整示例代码:

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
//采用https://github.com/mackron/dr_libs/blob/master/dr_wav.h 解码
#define DR_WAV_IMPLEMENTATION

#include "dr_wav.h"
#include "vad.h"

#ifndef nullptr
#define nullptr 0
#endif

#ifndef MIN
#define  MIN(A, B)        ((A) < (B) ? (A) : (B))
#endif

#ifndef MAX
#define  MAX(A, B)        ((A) > (B) ? (A) : (B))
#endif


//读取wav文件
int16_t *wavRead_int16(char *filename, uint32_t *sampleRate, uint64_t *totalSampleCount) {
    unsigned int channels;
    int16_t *buffer = drwav_open_and_read_file_s16(filename, &channels, sampleRate, totalSampleCount);
    if (buffer == nullptr) {
        printf("读取wav文件失败.");
    }
    //仅仅处理单通道音频
    if (channels != 1) {
        drwav_free(buffer);
        buffer = nullptr;
        *sampleRate = 0;
        *totalSampleCount = 0;
    }
    return buffer;
}


int vadProcess(int16_t *buffer, uint32_t sampleRate, size_t samplesCount, int16_t vad_mode, int per_ms_frames) {
    if (buffer == nullptr) return -1;
    if (samplesCount == 0) return -1;
    // kValidRates : 8000, 16000, 32000, 48000
    // 10, 20 or 30 ms frames
    per_ms_frames = MAX(MIN(30, per_ms_frames), 10);
    size_t samples = sampleRate * per_ms_frames / 1000;
    if (samples == 0) return -1;
    int16_t *input = buffer;
    size_t nTotal = (samplesCount / samples);

    void *vadInst = WebRtcVad_Create();
    if (vadInst == NULL) return -1;
    int status = WebRtcVad_Init(vadInst);
    if (status != 0) {
        printf("WebRtcVad_Init fail\n");
        WebRtcVad_Free(vadInst);
        return -1;
    }
    status = WebRtcVad_set_mode(vadInst, vad_mode);
    if (status != 0) {
        printf("WebRtcVad_set_mode fail\n");
        WebRtcVad_Free(vadInst);
        return -1;
    }
    printf("Activity : \n");
    for (int i = 0; i < nTotal; i++) {
        int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples);
        if (nVadRet == -1) {
            printf("failed in WebRtcVad_Process\n");
            WebRtcVad_Free(vadInst);
            return -1;
        } else {
            // output result
            printf(" %d \t", nVadRet);
        }
        input += samples;
    }
    printf("\n");
    WebRtcVad_Free(vadInst);
    return 1;
}

void vad(char *in_file) {
    //音频采样率
    uint32_t sampleRate = 0;
    //总音频采样数
    uint64_t inSampleCount = 0;
    int16_t *inBuffer = wavRead_int16(in_file, &sampleRate, &inSampleCount);
    //如果加载成功
    if (inBuffer != nullptr) {
        //    Aggressiveness mode (0, 1, 2, or 3)
        int16_t mode = 1;
        int per_ms = 30;
        vadProcess(inBuffer, sampleRate, inSampleCount, mode, per_ms);
        free(inBuffer);
    }
}

int main(int argc, char *argv[]) {
    printf("WebRTC Voice Activity Detector\n");
    printf("博客:http://cpuimage.cnblogs.com/\n");
    printf("静音检测\n");
    if (argc < 2)
        return -1;
    char *in_file = argv[1];
    vad(in_file);
    printf("按任意键退出程序 \n");
    getchar();
    return 0;
}

自动增益项目地址:https://github.com/cpuimage/WebRTC_AGC

具体流程为: 

加载wav(拖放wav文件到可执行文件上)->增益处理->保存为_out.wav文件

 

静音检测项目地址:https://github.com/cpuimage/WebRTC_VAD

具体流程为: 

加载wav(拖放wav文件到可执行文件上)->输出静音检测结果

备注 :1 为非静音,0 为静音

 该注意的地方和参数,见代码注释。

用cmake即可进行编译示例代码,详情见CMakeLists.txt。

 

若有其他相关问题或者需求也可以邮件联系俺探讨。

邮箱地址是: 
gaozhihan@vip.qq.com