hi i am also working on speech recognition project
this is a complete matlab code for MFCC and i have pdf book explain all steps for feature extraction using MFCC
function [ceps,freqresp,fb,fbrecon,freqrecon] = ...
mfcc2(input, samplingRate, frameRate,x)
global mfccDCTMatrix mfccFilterWeights
lowestFrequency = 133.3333;
linearFilters = 13;
linearSpacing = 66.66666666;
logFilters = 27;
logSpacing = 1.0711703;
fftSize = 512;
cepstralCoefficients = x;
windowSize = 400;
windowSize = 256;
if (nargin < 2) samplingRate = 16000; end;
if (nargin < 3) frameRate = 100; end;
totalFilters = linearFilters + logFilters;
freqs = lowestFrequency + (0:linearFilters-1)*linearSpacing;
freqs(linearFilters+1:totalFilters+2) = ...
freqs(linearFilters) * logSpacing.^(1:logFilters+2);
lower = freqs(1:totalFilters);
center = freqs(2:totalFilters+1);
upper = freqs(3:totalFilters+2);
mfccFilterWeights = zeros(totalFilters,fftSize);
triangleHeight = 2./(upper-lower);
fftFreqs = (0:fftSize-1)/fftSize*samplingRate;
for chan=1:totalFilters
mfccFilterWeights(chan,
= ...
(fftFreqs > lower(chan) & fftFreqs <= center(chan)).* ...
triangleHeight(chan).*(fftFreqs-lower(chan))/(center(chan)-lower(chan)) + ...
(fftFreqs > center(chan) & fftFreqs < upper(chan)).* ...
triangleHeight(chan).*(upper(chan)-fftFreqs)/(upper(chan)-center(chan));
end
hamWindow = 0.54 - 0.46*cos(2*pi*(0:windowSize-1)/windowSize);
if 0 % Window it like ComplexSpectrum
windowStep = samplingRate/frameRate;
a = .54;
b = -.46;
wr = sqrt(windowStep/windowSize);
phi = pi/windowSize;
hamWindow = 2*wr/sqrt(4*a*a+2*b*b)* ...
(a + b*cos(2*pi*(0:windowSize-1)/windowSize + phi));
end
mfccDCTMatrix = 1/sqrt(totalFilters/2)*cos((0
cepstralCoefficients-1))' * ...
(2*(0
totalFilters-1))+1) * pi/2/totalFilters);
mfccDCTMatrix(1,
= mfccDCTMatrix(1,
* sqrt(2)/2;
if 1
preEmphasized = filter([1 -.97], 1, input);
else
preEmphasized = input;
end
windowStep = samplingRate/frameRate;
cols = fix((length(input)-windowSize)/windowStep);
ceps = zeros(cepstralCoefficients, cols);
if (nargout > 1) freqresp = zeros(fftSize/2, cols); end;
if (nargout > 2) fb = zeros(totalFilters, cols); end;
if (nargout > 4)
fr = (0
fftSize/2-1))'/(fftSize/2)*samplingRate/2;
j = 1;
for i=1
fftSize/2)
if fr(i) > center(j+1)
j = j + 1;
end
if j > totalFilters-1
j = totalFilters-1;
end
fr(i) = min(totalFilters-.0001, ...
max(1,j + (fr(i)-center(j))/(center(j+1)-center(j))));
end
fri = fix(fr);
frac = fr - fri;
freqrecon = zeros(fftSize/2, cols);
end
for start=0:cols-1
first = start*windowStep + 1;
last = first + windowSize-1;
fftData = zeros(1,fftSize);
fftData(1:windowSize) = preEmphasized(first:last).*hamWindow;
fftMag = abs(fft(fftData));
earMag = log10(mfccFilterWeights * fftMag');
ceps
,start+1) = mfccDCTMatrix * earMag;
if (nargout > 1) freqresp
,start+1) = fftMag(1:fftSize/2)'; end;
if (nargout > 2) fb
,start+1) = earMag; end
if (nargout > 3)
fbrecon
,start+1) = ...
mfccDCTMatrix(1:cepstralCoefficients,
' * ...
ceps
,start+1);
end
if (nargout > 4)
f10 = 10.^fbrecon
,start+1);
freqrecon
,start+1) = samplingRate/fftSize * ...
(f10(fri).*(1-frac) + f10(fri+1).*frac);
end
end
if 1 & (nargout > 3)
fbrecon = mfccDCTMatrix(1:cepstralCoefficients,
' * ceps;
end;