function [d,sr,SNR,fshift] = renoiser(varargin) % [d,sr,SNR,fshift] = renoiser(varargin) % Utility to analyze and resynthesize noisy speech signals % % Usage modes % renoiser -mix mixfile -clean cleanfile % -noiseout noisefile -filterout filterfile -targetout targetfile % .. returns separated noise & filter & prints SNR % renoiser -noise noisefile -target targetfile % -SNR SNR -mixout mixfile % .. recombines filtered speech & noise to achieve % specified SNR % renoiser -noise noisefile -clean cleanfile -filter filterfile % -SNR SNR -mixout mixfile % .. resynthesizes new speech+noise output with % filter & SNR % % See http://labrosa.ee.columbia.edu/projects/renoiser/ % for full documentation % % 2011-02-10 Dan Ellis dpwe@ee.columbia.edu % $Header: $ VERSION = 0.2; DATE = 20110803; % Parse out the optional arguments [CLEANIN, ... TARGETIN, TARGETOUT, ... MIXIN, MIXOUT, ... NOISEIN, NOISEOUT, ... FILTERIN, FILTEROUT, ... START, END, ... SNRout, fshift, DISP, ... TARGETSR, NOISEFLOOR, CHECKFSHIFT, ... CLEANLIST, MIXOUTDIR, LAUNDERNOISE] = ... process_options(varargin, ... '-clean', '', ... '-target', '', '-targetout', '', ... '-mix', '', '-mixout', '', ... '-noise', '', '-noiseout', '', ... '-filter', '', '-filterout', '', ... '-start', 0, '-end', 0, ... '-SNR', 0, '-fshift', 0, '-disp', 0, ... '-targetsr', 0, '-noisefloor', -60, '-checkfshift', 0, ... '-cleanlist', '', '-mixoutdir', '', '-laundernoise',0); % initialize output vars (to placate runtime) d = []; sr = []; SNR = []; %fshift = []; if ~isempty(CLEANLIST) cleanfiles = textread(CLEANLIST, '%s'); else cleanfiles = {CLEANIN}; end for cx = 1:length(cleanfiles) THISCLEANIN = cleanfiles{cx}; disp(['++++++ renoiser v',num2str(VERSION),' ++++++']); didsomething = 0; % keep track of state havenoise = 0; havefilt = 0; havetarg = 0; haveclean = 0; donelaundry = 0; %ndisp = 5; ndisp = 4; dispn = 0; sr = TARGETSR; % If we specified an output dir, construct the corresponding % filename in it if ~isempty(MIXOUTDIR) [p,n,e] = fileparts(THISCLEANIN); e = '.wav'; %MIXOUT = fullfile(MIXOUTDIR,[n,e]); % include the entire path from cleanin as part of the output path MIXOUT = fullfile(MIXOUTDIR, p, [n,e]); end if ~isempty(THISCLEANIN) % actually, we need a clean for EVERYTHING disp(['Reading CLEAN from ',THISCLEANIN,' ...']); [dclean,sr] = audioread(THISCLEANIN, sr); haveclean = 1; dclean = applystartend(dclean, sr, START, END); % add white noise at the specified noise floor level % this is to avoid instabilities for frequencies where clean % has no energy, but mix does. dclean = dclean + randn(length(dclean),1) * ... sqrt(activlev(dclean,sr))*(10^(NOISEFLOOR/20)); %dispn = maybedisp(dclean,sr,dispn,ndisp,DISP,['clean - ',THISCLEANIN]); end % which mode? if ~isempty(MIXIN) && haveclean % mix + clean to give noise + filter + SNR disp(['Reading MIX from ',MIXIN,' ...']); [dmix,sr] = audioread(MIXIN, sr); dmix = applystartend(dmix, sr, START, END); disp('Identifying CLEAN in MIX...'); if CHECKFSHIFT % checking for frequency shift is slow, so only do it if requested [dnoise, dtarg, dfilt, SNRin, delay, fshift] = ... find_in_mix_tf(dmix, dclean, sr); disp(['Mix freq shift= ',sprintf('%.1f',fshift),' Hz']); else [dnoise, dtarg, dfilt, SNRin, delay] = ... find_in_mix(dmix, dclean, sr); fshift = 0; end disp(['Mix delay= ',sprintf('%.3f',delay),' s']); dispn = maybedisp(dmix,sr,dispn,ndisp,DISP,... ['Mix: delay=',sprintf('%.3f',delay), ' s, '... 'SNR=',sprintf('%.1f',SNRin),' dB - ',MIXIN]); SNR = SNRin; % returned value if LAUNDERNOISE > 0 && ~donelaundry dnoise = laundernoise(dnoise, sr, LAUNDERNOISE); donelaundry = 1; end % save results newgain = 1; if ~isempty(FILTEROUT) if max(abs(dfilt)) > 1 newgain = 32767 / 32768 / max(abs(dfilt)); disp(['Warning: reducing gain of filter (and noise, and target) by ', ... num2str(newgain),' x to avoid clipping.']); end audiowrite(newgain*dfilt,sr, FILTEROUT); disp(['FILTER saved to ',FILTEROUT]); end if ~isempty(NOISEOUT) audiowrite(newgain*dnoise, sr, NOISEOUT); disp(['NOISE saved to ',NOISEOUT]); end if ~isempty(TARGETOUT) audiowrite(newgain*dtarg,sr, TARGETOUT); disp(['TARGET saved to ',TARGETOUT]); end disp(['Input mix SNR= ',sprintf('%.2f',SNRin),' dB']); havenoise = 1; havefilt = 1; havetarg = 1; % if just analyzing, return noise if nargout > 0 d = dnoise; end didsomething = 1; dispn = maybedisp(dnoise,sr,dispn,ndisp,DISP,['noise - ',NOISEOUT]); dispn = maybedisp(dtarg,sr,dispn,ndisp,DISP,['target - ',TARGETOUT]); if DISP if dispn <= ndisp dispn = dispn+1; subplot(ndisp,1,dispn); plot([1:length(dfilt)]/sr,dfilt); title(['filter - ',FILTEROUT]); end end end % Maybe read a filter? if ~isempty(FILTERIN) if havefilt disp(['Replacing FILTER from ',FILTERIN,' ...']); else disp(['Reading FILTER from ',FILTERIN,' ...']); end [dfilt,sr] = audioread(FILTERIN, sr); havefilt = 1; elseif ~havefilt dfilt = [1]; % default - just copy havefilt = 1; disp(['No FILTER - just copying CLEAN to TARGET']); end % Maybe read noise? if ~isempty(NOISEIN) if havenoise disp(['Replacing NOISE from ',NOISEIN,' ...']); else disp(['Reading NOISE from ',NOISEIN,' ...']); end [dnoise,sr] = audioread(NOISEIN, sr); % Don't apply -start, -end havenoise = 1; end % Maybe read target? if ~isempty(TARGETIN) if havetarg disp(['Replacing TARGET from ',TARGETIN,' ...']); else disp(['Reading TARGET from ',TARGETIN,' ...']); end [dtarg,sr] = audioread(TARGETIN, sr); % Apply -start, -end? probably not, probably target was already trimmed havetarg = 1; end % Create a target by filtering clean? if ~havetarg && havefilt && haveclean % apply filter disp('Filtering CLEAN to produce target...'); dtarg = filter(dfilt,1,dclean); havetarg = 1; elseif ~isempty(FILTERIN) disp('WARN: filter input specified but not used'); end if havenoise && havetarg && ~isempty(MIXOUT) % noise + target (e.g. filtered clean) + SNR to give noisy speech disp(['Creating new output mix at SNR ',num2str(SNRout),' dB ...']) if LAUNDERNOISE && ~donelaundry dnoise = laundernoise(dnoise, sr, LAUNDERNOISE); donelaundry = 1; end dmix = mix_noise(dtarg, dnoise, sr, SNRout); if fshift ~= 0 disp(['Applying frequency shift of ',num2str(fshift),' Hz to output ...']); dmix = freqshift(dmix, sr, fshift); end if ~isempty(MIXOUT) audiowrite(dmix,sr,MIXOUT); disp(['MIX saved to ',MIXOUT]); end % if mixing, return result of mix if nargout > 0 d = dmix; end didsomething = 1; end if ~didsomething argv0 = 'renoiser'; disp('Error: unrecognized parameter combination'); disp('Usage:'); disp([argv0,' -mix mixfile -clean cleanfile -noiseout noisefile']); disp(' -filterout filterfile -cleanout cleanoutfile'); disp(' .. calculates the optimal alignment and filtering to predict'); disp(' the mix, and returns the residual noise, and the filter'); disp(' definition. It also reports the effective calculated SNR'); disp(''); disp([argv0,' -noise noisefile -clean cleanfile -filter filterfile']); disp(' -SNR SNRlevel -mixout mixfile'); disp(' .. constructs a new filtered/noisy speech file by applying'); disp(' the specified filter to the clean speech, then mixing it'); disp(' to achieve the specified SNR (post-filtering, using '); disp(' P50-style speech level measurement)'); disp(''); disp([argv0,' -noise noisefile -target targfile -SNR SNRlevel -mixout mixfile']); disp(' .. constructs a new filtered/noisy speech file by combining target'); disp(' and noise without filtering.'); end % if DISP % disp('Press any key to continue...'); % pause; % end end % THISCLEANIN loop %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function y = applystartend(x,sr,START,END) % Trim a waveform to match specified start/end times y = x; if START > 0 || END > START if END < START; END = length(y)/sr; end y = y((round(START*sr)+1):round(END*sr)); end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function dispnout = maybedisp(d,sr,dispnin,ndisp,DISP,tit) % Maybe add a spectrogram and move down fmax = 4000; dbrange = 80; % 32 ms FFT window len fftwinsec = 0.032; fftlen = 2^(round(log(sr*fftwinsec)/log(2))); dispnout = 0; if DISP if dispnin < ndisp dispnout = dispnin+1; subplot(ndisp,1,dispnout); specgram(d,fftlen,sr); ax = axis(); ax(4) = fmax; axis(ax); ca = caxis(); ca(1) = ca(2)-dbrange; caxis(ca); %colorbar; title(tit,'interpreter','none'); end end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function y = laundernoise(dnoise,sr,noiseanwinsec) % function to replace noise with LPC-filtered white-noise analog % (to remove excessive nonlinear structure) % called from two places, hence a subroutine disp('Analyzing/resynthesizing NOISE to launder it...'); %noiseanwinsec = 0.050; noiseanwin = 2*round(sr*noiseanwinsec/2); noisepoles = 20; % or scale with SR? % pad with a half-frame of reflected signal fpad = dnoise(noiseanwin/2:-1:1); epad = dnoise(end:-1:end-noiseanwin/2); [a,g] = lpcfit([fpad;dnoise;epad]', noisepoles, noiseanwin/2); % hop=win/2 y = lpcsynth(a,g,[],noiseanwin/2); % strip out padding y = y(noiseanwin/2+[1:length(dnoise)])';