kareem dsp
Junior Member level 1
hi all
i need matlab code for features exctraction using MFCC to use these featurs in dialect recognition
i need matlab code for features exctraction using MFCC to use these featurs in dialect recognition
Follow along with the video below to see how to install our site as a web app on your home screen.
Note: This feature currently requires accessing the site using the built-in Safari browser.
AI=analoginput('winsound',1);
addchannel(AI,1);
set(AI,'samplerate',16000);
set(AI,'samplespertrigger',2*16000);
set(AI,'triggertype','immediate');
start(AI);
data=getdata(AI);
fs=16000;
%file = sprintf('%s%d.wav','database_new kar',i);
%%wavwrite(data,16000,file);
%% Computing MFCC Co-efficients..
%% (1) Frame Blocking..
N = 256; % N point FFT
M = 100; % Overlapping
NN = floor(N/2+1); %N/2
nbFrames = ceil((length(data)-N)/M);
Frames = zeros(nbFrames+1,N);
for i = 0:nbFrames-1
temp = data(i*M+1:i*M+N);
Frames(i+1,1:N) = temp;
end
% Last Frame..
temp = zeros(1,N);
lastLength = length(data)- nbFrames*M;
temp(1:lastLength) = data(nbFrames*M+1:(nbFrames*M +1 + lastLength-1));
Frames(nbFrames+1, 1:N) = temp;
%% (2) Windowing..
frameSize = size(Frames);
nbFrames = frameSize(1);
nbSamples = frameSize(2);
% Hamming window..
w = hamming(nbSamples);
Windows = zeros(nbFrames,nbSamples);
for i = 1:nbFrames
temp = Frames(i,1:nbSamples);
Windows(i, 1:nbSamples) = w'.*temp;
end
%% (3) Fourier Transform..
ffts = fft(Windows');
%% (4) Mel-frequency Wrapping..
% (a) Calculate Power spectrum..
PowSpecs = abs(ffts).^2;
PowSpecs = PowSpecs(1:NN-1,:);
% (b) Mel filter generation
nof_c = 20; % Number of channels..
df = fs/N;
Nmax = N/2;
fmax = fs/2;
% Convert to mel scale..
melmax = 2595*log10(1+fmax/700);
melinc = melmax/(nof_c+1);
melcenters = (1:nof_c).*melinc;
% Convert to frequency scale..
fcenters = 700*((10.^(melcenters./2595))-1);
centerf = round(fcenters./df);
startf = [1,centerf(1:nof_c-1)];
stopf = [centerf(2:nof_c),Nmax];
W = zeros(nof_c,Nmax);
% Making filter..
for i = 1:nof_c
increment = 1.0/(centerf(i)-startf(i));
for j = startf(i):centerf(i)
W(i,j) = (j-startf(i))*increment;
end
decrement = 1.0/(stopf(i)-centerf(i));
for j = centerf(i):stopf(i)
W(i,j) = (j-centerf(i))*decrement;
end
end
% Normalising..
for i = 1:nof_c
W(i,:) = W(i,:)/sum(W(i,:));
end
% (c) Apply mel filters to Power spectrum coeffs..
melPowSpecs = W*PowSpecs;
% (d) MFCC calculations..
melCeps = dct(log(melPowSpecs));
melCeps(1,:) = [];
load CodeBook
dist_min = inf;
spkr = 0;
for ind = 1:length(codebook)
single_cb = codebook{ind};
[M1, N1] = size(melCeps);
[M2, N2] = size(single_cb);
dist_temp = zeros(N1,N2);
if N1<N2
repli = zeros(1,N2);
for n1 = 1:N1
dist_temp(n1,:) = sum((melCeps(:,n1+repli) - single_cb).^2,1);
end
else
repli = zeros(1,N1);
for n2 = 1:N2
dist_temp(:,n2) = sum((melCeps - single_cb(:,n2+repli)).^2,1);
end
end
dist_temp = sqrt(dist_temp);
dist_val(ind) = sum(min(dist_temp,[],2))/size(dist_temp,1);
if dist_val(ind) < dist_min
dist_min = dist_val(ind);
spkr = ind;
end
end
msg = sprintf('The Speaker is found);
disp(msg);
Hi i have a code and pdf for feature extraction using MFCC for speaker recognition. I will attach that please check it and use it if helpful.
Code:AI=analoginput('winsound',1); addchannel(AI,1); set(AI,'samplerate',16000); set(AI,'samplespertrigger',2*16000); set(AI,'triggertype','immediate'); start(AI); data=getdata(AI); fs=16000; %file = sprintf('%s%d.wav','database_new kar',i); %%wavwrite(data,16000,file); %% Computing MFCC Co-efficients.. %% (1) Frame Blocking.. N = 256; % N point FFT M = 100; % Overlapping NN = floor(N/2+1); %N/2 nbFrames = ceil((length(data)-N)/M); Frames = zeros(nbFrames+1,N); for i = 0:nbFrames-1 temp = data(i*M+1:i*M+N); Frames(i+1,1:N) = temp; end % Last Frame.. temp = zeros(1,N); lastLength = length(data)- nbFrames*M; temp(1:lastLength) = data(nbFrames*M+1:(nbFrames*M +1 + lastLength-1)); Frames(nbFrames+1, 1:N) = temp; %% (2) Windowing.. frameSize = size(Frames); nbFrames = frameSize(1); nbSamples = frameSize(2); % Hamming window.. w = hamming(nbSamples); Windows = zeros(nbFrames,nbSamples); for i = 1:nbFrames temp = Frames(i,1:nbSamples); Windows(i, 1:nbSamples) = w'.*temp; end %% (3) Fourier Transform.. ffts = fft(Windows'); %% (4) Mel-frequency Wrapping.. % (a) Calculate Power spectrum.. PowSpecs = abs(ffts).^2; PowSpecs = PowSpecs(1:NN-1,:); % (b) Mel filter generation nof_c = 20; % Number of channels.. df = fs/N; Nmax = N/2; fmax = fs/2; % Convert to mel scale.. melmax = 2595*log10(1+fmax/700); melinc = melmax/(nof_c+1); melcenters = (1:nof_c).*melinc; % Convert to frequency scale.. fcenters = 700*((10.^(melcenters./2595))-1); centerf = round(fcenters./df); startf = [1,centerf(1:nof_c-1)]; stopf = [centerf(2:nof_c),Nmax]; W = zeros(nof_c,Nmax); % Making filter.. for i = 1:nof_c increment = 1.0/(centerf(i)-startf(i)); for j = startf(i):centerf(i) W(i,j) = (j-startf(i))*increment; end decrement = 1.0/(stopf(i)-centerf(i)); for j = centerf(i):stopf(i) W(i,j) = (j-centerf(i))*decrement; end end % Normalising.. for i = 1:nof_c W(i,:) = W(i,:)/sum(W(i,:)); end % (c) Apply mel filters to Power spectrum coeffs.. melPowSpecs = W*PowSpecs; % (d) MFCC calculations.. melCeps = dct(log(melPowSpecs)); melCeps(1,:) = []; load CodeBook dist_min = inf; spkr = 0; for ind = 1:length(codebook) single_cb = codebook{ind}; [M1, N1] = size(melCeps); [M2, N2] = size(single_cb); dist_temp = zeros(N1,N2); if N1<N2 repli = zeros(1,N2); for n1 = 1:N1 dist_temp(n1,:) = sum((melCeps(:,n1+repli) - single_cb).^2,1); end else repli = zeros(1,N1); for n2 = 1:N2 dist_temp(:,n2) = sum((melCeps - single_cb(:,n2+repli)).^2,1); end end dist_temp = sqrt(dist_temp); dist_val(ind) = sum(min(dist_temp,[],2))/size(dist_temp,1); if dist_val(ind) < dist_min dist_min = dist_val(ind); spkr = ind; end end msg = sprintf('The Speaker is found); disp(msg);
pdf:
Hi i have a code and pdf for feature extraction using MFCC for speaker recognition. I will attach that please check it and use it if helpful.
Code:AI=analoginput('winsound',1); addchannel(AI,1); set(AI,'samplerate',16000); set(AI,'samplespertrigger',2*16000); set(AI,'triggertype','immediate'); start(AI); data=getdata(AI); fs=16000; %file = sprintf('%s%d.wav','database_new kar',i); %%wavwrite(data,16000,file); %% Computing MFCC Co-efficients.. %% (1) Frame Blocking.. N = 256; % N point FFT M = 100; % Overlapping NN = floor(N/2+1); %N/2 nbFrames = ceil((length(data)-N)/M); Frames = zeros(nbFrames+1,N); for i = 0:nbFrames-1 temp = data(i*M+1:i*M+N); Frames(i+1,1:N) = temp; end % Last Frame.. temp = zeros(1,N); lastLength = length(data)- nbFrames*M; temp(1:lastLength) = data(nbFrames*M+1:(nbFrames*M +1 + lastLength-1)); Frames(nbFrames+1, 1:N) = temp; %% (2) Windowing.. frameSize = size(Frames); nbFrames = frameSize(1); nbSamples = frameSize(2); % Hamming window.. w = hamming(nbSamples); Windows = zeros(nbFrames,nbSamples); for i = 1:nbFrames temp = Frames(i,1:nbSamples); Windows(i, 1:nbSamples) = w'.*temp; end %% (3) Fourier Transform.. ffts = fft(Windows'); %% (4) Mel-frequency Wrapping.. % (a) Calculate Power spectrum.. PowSpecs = abs(ffts).^2; PowSpecs = PowSpecs(1:NN-1,:); % (b) Mel filter generation nof_c = 20; % Number of channels.. df = fs/N; Nmax = N/2; fmax = fs/2; % Convert to mel scale.. melmax = 2595*log10(1+fmax/700); melinc = melmax/(nof_c+1); melcenters = (1:nof_c).*melinc; % Convert to frequency scale.. fcenters = 700*((10.^(melcenters./2595))-1); centerf = round(fcenters./df); startf = [1,centerf(1:nof_c-1)]; stopf = [centerf(2:nof_c),Nmax]; W = zeros(nof_c,Nmax); % Making filter.. for i = 1:nof_c increment = 1.0/(centerf(i)-startf(i)); for j = startf(i):centerf(i) W(i,j) = (j-startf(i))*increment; end decrement = 1.0/(stopf(i)-centerf(i)); for j = centerf(i):stopf(i) W(i,j) = (j-centerf(i))*decrement; end end % Normalising.. for i = 1:nof_c W(i,:) = W(i,:)/sum(W(i,:)); end % (c) Apply mel filters to Power spectrum coeffs.. melPowSpecs = W*PowSpecs; % (d) MFCC calculations.. melCeps = dct(log(melPowSpecs)); melCeps(1,:) = []; load CodeBook dist_min = inf; spkr = 0; for ind = 1:length(codebook) single_cb = codebook{ind}; [M1, N1] = size(melCeps); [M2, N2] = size(single_cb); dist_temp = zeros(N1,N2); if N1<N2 repli = zeros(1,N2); for n1 = 1:N1 dist_temp(n1,:) = sum((melCeps(:,n1+repli) - single_cb).^2,1); end else repli = zeros(1,N1); for n2 = 1:N2 dist_temp(:,n2) = sum((melCeps - single_cb(:,n2+repli)).^2,1); end end dist_temp = sqrt(dist_temp); dist_val(ind) = sum(min(dist_temp,[],2))/size(dist_temp,1); if dist_val(ind) < dist_min dist_min = dist_val(ind); spkr = ind; end end msg = sprintf('The Speaker is found); disp(msg);
pdf:
This section is to connect a recording device for example a mic into matlab environment. Analog input function selects the input device. add channel will create the device object. then triggering properties are varied using the set keyword.this part is for what sir? please help me...:smile:
AI=analoginput('winsound',1);
addchannel(AI,1);
set(AI,'samplerate',16000);
set(AI,'samplespertrigger',2*16000);
set(AI,'triggertype','immediate');
start(AI);
Hi i have a code and pdf for feature extraction using MFCC for speaker recognition. I will attach that please check it and use it if helpful.
Code:AI=analoginput('winsound',1); addchannel(AI,1); set(AI,'samplerate',16000); set(AI,'samplespertrigger',2*16000); set(AI,'triggertype','immediate'); start(AI); data=getdata(AI); fs=16000; %file = sprintf('%s%d.wav','database_new kar',i); %%wavwrite(data,16000,file); %% Computing MFCC Co-efficients.. %% (1) Frame Blocking.. N = 256; % N point FFT M = 100; % Overlapping NN = floor(N/2+1); %N/2 nbFrames = ceil((length(data)-N)/M); Frames = zeros(nbFrames+1,N); for i = 0:nbFrames-1 temp = data(i*M+1:i*M+N); Frames(i+1,1:N) = temp; end % Last Frame.. temp = zeros(1,N); lastLength = length(data)- nbFrames*M; temp(1:lastLength) = data(nbFrames*M+1:(nbFrames*M +1 + lastLength-1)); Frames(nbFrames+1, 1:N) = temp; %% (2) Windowing.. frameSize = size(Frames); nbFrames = frameSize(1); nbSamples = frameSize(2); % Hamming window.. w = hamming(nbSamples); Windows = zeros(nbFrames,nbSamples); for i = 1:nbFrames temp = Frames(i,1:nbSamples); Windows(i, 1:nbSamples) = w'.*temp; end %% (3) Fourier Transform.. ffts = fft(Windows'); %% (4) Mel-frequency Wrapping.. % (a) Calculate Power spectrum.. PowSpecs = abs(ffts).^2; PowSpecs = PowSpecs(1:NN-1,:); % (b) Mel filter generation nof_c = 20; % Number of channels.. df = fs/N; Nmax = N/2; fmax = fs/2; % Convert to mel scale.. melmax = 2595*log10(1+fmax/700); melinc = melmax/(nof_c+1); melcenters = (1:nof_c).*melinc; % Convert to frequency scale.. fcenters = 700*((10.^(melcenters./2595))-1); centerf = round(fcenters./df); startf = [1,centerf(1:nof_c-1)]; stopf = [centerf(2:nof_c),Nmax]; W = zeros(nof_c,Nmax); % Making filter.. for i = 1:nof_c increment = 1.0/(centerf(i)-startf(i)); for j = startf(i):centerf(i) W(i,j) = (j-startf(i))*increment; end decrement = 1.0/(stopf(i)-centerf(i)); for j = centerf(i):stopf(i) W(i,j) = (j-centerf(i))*decrement; end end % Normalising.. for i = 1:nof_c W(i,:) = W(i,:)/sum(W(i,:)); end % (c) Apply mel filters to Power spectrum coeffs.. melPowSpecs = W*PowSpecs; % (d) MFCC calculations.. melCeps = dct(log(melPowSpecs)); melCeps(1,:) = []; load CodeBook dist_min = inf; spkr = 0; for ind = 1:length(codebook) single_cb = codebook{ind}; [M1, N1] = size(melCeps); [M2, N2] = size(single_cb); dist_temp = zeros(N1,N2); if N1<N2 repli = zeros(1,N2); for n1 = 1:N1 dist_temp(n1,:) = sum((melCeps(:,n1+repli) - single_cb).^2,1); end else repli = zeros(1,N1); for n2 = 1:N2 dist_temp(:,n2) = sum((melCeps - single_cb(:,n2+repli)).^2,1); end end dist_temp = sqrt(dist_temp); dist_val(ind) = sum(min(dist_temp,[],2))/size(dist_temp,1); if dist_val(ind) < dist_min dist_min = dist_val(ind); spkr = ind; end end msg = sprintf('The Speaker is found); disp(msg);
pdf:
Hi sir,
i tried run your code but it seems there appear some "cut-off" triangular filters(normalized,image - MFCCfilters(Sir's Code) ) instead of the triangular filter ouput(only image) that i took from one internet source (normalized,bottom image of image - TriFilterBank(From other source)). Can sir explain to me about this ? Thanks. =)
Actually the code written above is for voice recognition and not for image recognition that may be the problem with you
Hi i have a code and pdf for feature extraction using MFCC for speaker recognition. I will attach that please check it and use it if helpful.
Code:AI=analoginput('winsound',1); addchannel(AI,1); set(AI,'samplerate',16000); set(AI,'samplespertrigger',2*16000); set(AI,'triggertype','immediate'); start(AI); data=getdata(AI); fs=16000; %file = sprintf('%s%d.wav','database_new kar',i); %%wavwrite(data,16000,file); %% Computing MFCC Co-efficients.. %% (1) Frame Blocking.. N = 256; % N point FFT M = 100; % Overlapping NN = floor(N/2+1); %N/2 nbFrames = ceil((length(data)-N)/M); Frames = zeros(nbFrames+1,N); for i = 0:nbFrames-1 temp = data(i*M+1:i*M+N); Frames(i+1,1:N) = temp; end % Last Frame.. temp = zeros(1,N); lastLength = length(data)- nbFrames*M; temp(1:lastLength) = data(nbFrames*M+1:(nbFrames*M +1 + lastLength-1)); Frames(nbFrames+1, 1:N) = temp; %% (2) Windowing.. frameSize = size(Frames); nbFrames = frameSize(1); nbSamples = frameSize(2); % Hamming window.. w = hamming(nbSamples); Windows = zeros(nbFrames,nbSamples); for i = 1:nbFrames temp = Frames(i,1:nbSamples); Windows(i, 1:nbSamples) = w'.*temp; end %% (3) Fourier Transform.. ffts = fft(Windows'); %% (4) Mel-frequency Wrapping.. % (a) Calculate Power spectrum.. PowSpecs = abs(ffts).^2; PowSpecs = PowSpecs(1:NN-1,:); % (b) Mel filter generation nof_c = 20; % Number of channels.. df = fs/N; Nmax = N/2; fmax = fs/2; % Convert to mel scale.. melmax = 2595*log10(1+fmax/700); melinc = melmax/(nof_c+1); melcenters = (1:nof_c).*melinc; % Convert to frequency scale.. fcenters = 700*((10.^(melcenters./2595))-1); centerf = round(fcenters./df); startf = [1,centerf(1:nof_c-1)]; stopf = [centerf(2:nof_c),Nmax]; W = zeros(nof_c,Nmax); % Making filter.. for i = 1:nof_c increment = 1.0/(centerf(i)-startf(i)); for j = startf(i):centerf(i) W(i,j) = (j-startf(i))*increment; end decrement = 1.0/(stopf(i)-centerf(i)); for j = centerf(i):stopf(i) W(i,j) = (j-centerf(i))*decrement; end end % Normalising.. for i = 1:nof_c W(i,:) = W(i,:)/sum(W(i,:)); end % (c) Apply mel filters to Power spectrum coeffs.. melPowSpecs = W*PowSpecs; % (d) MFCC calculations.. melCeps = dct(log(melPowSpecs)); melCeps(1,:) = []; load CodeBook dist_min = inf; spkr = 0; for ind = 1:length(codebook) single_cb = codebook{ind}; [M1, N1] = size(melCeps); [M2, N2] = size(single_cb); dist_temp = zeros(N1,N2); if N1<N2 repli = zeros(1,N2); for n1 = 1:N1 dist_temp(n1,:) = sum((melCeps(:,n1+repli) - single_cb).^2,1); end else repli = zeros(1,N1); for n2 = 1:N2 dist_temp(:,n2) = sum((melCeps - single_cb(:,n2+repli)).^2,1); end end dist_temp = sqrt(dist_temp); dist_val(ind) = sum(min(dist_temp,[],2))/size(dist_temp,1); if dist_val(ind) < dist_min dist_min = dist_val(ind); spkr = ind; end end msg = sprintf('The Speaker is found); disp(msg);
pdf:
Actually the code written above is for voice recognition and not for image recognition that may be the problem with you
Hi i have a code and pdf for feature extraction using MFCC for speaker recognition. I will attach that please check it and use it if helpful.
Code:AI=analoginput('winsound',1); addchannel(AI,1); set(AI,'samplerate',16000); set(AI,'samplespertrigger',2*16000); set(AI,'triggertype','immediate'); start(AI); data=getdata(AI); fs=16000; %file = sprintf('%s%d.wav','database_new kar',i); %%wavwrite(data,16000,file); %% Computing MFCC Co-efficients.. %% (1) Frame Blocking.. N = 256; % N point FFT M = 100; % Overlapping NN = floor(N/2+1); %N/2 nbFrames = ceil((length(data)-N)/M); Frames = zeros(nbFrames+1,N); for i = 0:nbFrames-1 temp = data(i*M+1:i*M+N); Frames(i+1,1:N) = temp; end % Last Frame.. temp = zeros(1,N); lastLength = length(data)- nbFrames*M; temp(1:lastLength) = data(nbFrames*M+1:(nbFrames*M +1 + lastLength-1)); Frames(nbFrames+1, 1:N) = temp; %% (2) Windowing.. frameSize = size(Frames); nbFrames = frameSize(1); nbSamples = frameSize(2); % Hamming window.. w = hamming(nbSamples); Windows = zeros(nbFrames,nbSamples); for i = 1:nbFrames temp = Frames(i,1:nbSamples); Windows(i, 1:nbSamples) = w'.*temp; end %% (3) Fourier Transform.. ffts = fft(Windows'); %% (4) Mel-frequency Wrapping.. % (a) Calculate Power spectrum.. PowSpecs = abs(ffts).^2; PowSpecs = PowSpecs(1:NN-1,:); % (b) Mel filter generation nof_c = 20; % Number of channels.. df = fs/N; Nmax = N/2; fmax = fs/2; % Convert to mel scale.. melmax = 2595*log10(1+fmax/700); melinc = melmax/(nof_c+1); melcenters = (1:nof_c).*melinc; % Convert to frequency scale.. fcenters = 700*((10.^(melcenters./2595))-1); centerf = round(fcenters./df); startf = [1,centerf(1:nof_c-1)]; stopf = [centerf(2:nof_c),Nmax]; W = zeros(nof_c,Nmax); % Making filter.. for i = 1:nof_c increment = 1.0/(centerf(i)-startf(i)); for j = startf(i):centerf(i) W(i,j) = (j-startf(i))*increment; end decrement = 1.0/(stopf(i)-centerf(i)); for j = centerf(i):stopf(i) W(i,j) = (j-centerf(i))*decrement; end end % Normalising.. for i = 1:nof_c W(i,:) = W(i,:)/sum(W(i,:)); end % (c) Apply mel filters to Power spectrum coeffs.. melPowSpecs = W*PowSpecs; % (d) MFCC calculations.. melCeps = dct(log(melPowSpecs)); melCeps(1,:) = []; load CodeBook dist_min = inf; spkr = 0; for ind = 1:length(codebook) single_cb = codebook{ind}; [M1, N1] = size(melCeps); [M2, N2] = size(single_cb); dist_temp = zeros(N1,N2); if N1<N2 repli = zeros(1,N2); for n1 = 1:N1 dist_temp(n1,:) = sum((melCeps(:,n1+repli) - single_cb).^2,1); end else repli = zeros(1,N1); for n2 = 1:N2 dist_temp(:,n2) = sum((melCeps - single_cb(:,n2+repli)).^2,1); end end dist_temp = sqrt(dist_temp); dist_val(ind) = sum(min(dist_temp,[],2))/size(dist_temp,1); if dist_val(ind) < dist_min dist_min = dist_val(ind); spkr = ind; end end msg = sprintf('The Speaker is found); disp(msg);
pdf: