Continue to Site

Welcome to EDAboard.com

Welcome to our site! EDAboard.com is an international Electronics Discussion Forum focused on EDA software, circuits, schematics, books, theory, papers, asic, pld, 8051, DSP, Network, RF, Analog Design, PCB, Service Manuals... and a whole lot more! To participate you need to register. Registration is free. Click here to register now.

need matlab code for features exctraction using MFCC

Status
Not open for further replies.

kareem dsp

Junior Member level 1
Junior Member level 1
Joined
Oct 24, 2012
Messages
18
Helped
3
Reputation
6
Reaction score
3
Trophy points
1,283
Visit site
Activity points
1,419
hi all
i need matlab code for features exctraction using MFCC to use these featurs in dialect recognition
 

Hi i have a code and pdf for feature extraction using MFCC for speaker recognition. I will attach that please check it and use it if helpful.

Code:
AI=analoginput('winsound',1);
addchannel(AI,1);
set(AI,'samplerate',16000);
set(AI,'samplespertrigger',2*16000);
set(AI,'triggertype','immediate');
start(AI);
data=getdata(AI);
fs=16000;
%file = sprintf('%s%d.wav','database_new kar',i);
%%wavwrite(data,16000,file);

%% Computing MFCC Co-efficients..
    %% (1) Frame Blocking..
    N = 256;   % N point FFT
    M = 100;   % Overlapping

    NN = floor(N/2+1); %N/2
    nbFrames = ceil((length(data)-N)/M);
    Frames = zeros(nbFrames+1,N);
    for i = 0:nbFrames-1
        temp = data(i*M+1:i*M+N);
        Frames(i+1,1:N) = temp; 
    end

    % Last Frame..
    temp = zeros(1,N); 
    lastLength = length(data)- nbFrames*M;
    temp(1:lastLength) = data(nbFrames*M+1:(nbFrames*M +1 + lastLength-1));  
    Frames(nbFrames+1, 1:N) = temp;
    %% (2) Windowing..
    frameSize = size(Frames); 
    nbFrames = frameSize(1); 
    nbSamples = frameSize(2); 
 
    % Hamming window.. 
    w = hamming(nbSamples); 
    Windows = zeros(nbFrames,nbSamples);
    for i = 1:nbFrames
        temp = Frames(i,1:nbSamples); 
        Windows(i, 1:nbSamples) = w'.*temp; 
    end
    %% (3) Fourier Transform..
    ffts = fft(Windows');
    %% (4) Mel-frequency Wrapping..
    % (a) Calculate Power spectrum..
    PowSpecs = abs(ffts).^2;
    PowSpecs = PowSpecs(1:NN-1,:);
    % (b) Mel filter generation
    nof_c = 20; % Number of channels..
    df = fs/N;
    Nmax = N/2;
    fmax = fs/2;

    % Convert to mel scale..
    melmax = 2595*log10(1+fmax/700);

    melinc = melmax/(nof_c+1);

    melcenters = (1:nof_c).*melinc;

    % Convert to frequency scale.. 
    fcenters = 700*((10.^(melcenters./2595))-1);

    centerf = round(fcenters./df);

    startf = [1,centerf(1:nof_c-1)];
    stopf = [centerf(2:nof_c),Nmax];

    W = zeros(nof_c,Nmax);

    % Making filter..
    for i = 1:nof_c
        increment = 1.0/(centerf(i)-startf(i));
        for j = startf(i):centerf(i)
            W(i,j) = (j-startf(i))*increment;
        end
   
        decrement = 1.0/(stopf(i)-centerf(i));
        for j = centerf(i):stopf(i)
            W(i,j) = (j-centerf(i))*decrement;
        end 
    end
    % Normalising..
    for i = 1:nof_c
        W(i,:) = W(i,:)/sum(W(i,:));
    end
    
    % (c) Apply mel filters to Power spectrum coeffs..
    melPowSpecs = W*PowSpecs;
    % (d) MFCC calculations..
    melCeps = dct(log(melPowSpecs));
    melCeps(1,:) = [];
    
    load CodeBook
	dist_min = inf;
	spkr = 0;
	for ind = 1:length(codebook)
		single_cb =  codebook{ind};
		[M1, N1] = size(melCeps);
		[M2, N2] = size(single_cb);  
		dist_temp = zeros(N1,N2);
		if N1<N2
            repli = zeros(1,N2);
            for n1 = 1:N1
                dist_temp(n1,:) = sum((melCeps(:,n1+repli) - single_cb).^2,1);
            end
        else
            repli = zeros(1,N1);
            for n2 = 1:N2
                dist_temp(:,n2) = sum((melCeps - single_cb(:,n2+repli)).^2,1);
            end
        end
        dist_temp = sqrt(dist_temp);
		dist_val(ind) = sum(min(dist_temp,[],2))/size(dist_temp,1);
		if dist_val(ind) < dist_min
            dist_min = dist_val(ind);
			spkr = ind;
		end
	end
	msg = sprintf('The Speaker is found);
    	disp(msg);

pdf:
 

Attachments

  • Speaker Recognition - Using MFCC.pdf
    368.8 KB · Views: 601
Hi i have a code and pdf for feature extraction using MFCC for speaker recognition. I will attach that please check it and use it if helpful.

Code:
AI=analoginput('winsound',1);
addchannel(AI,1);
set(AI,'samplerate',16000);
set(AI,'samplespertrigger',2*16000);
set(AI,'triggertype','immediate');
start(AI);
data=getdata(AI);
fs=16000;
%file = sprintf('%s%d.wav','database_new kar',i);
%%wavwrite(data,16000,file);

%% Computing MFCC Co-efficients..
    %% (1) Frame Blocking..
    N = 256;   % N point FFT
    M = 100;   % Overlapping

    NN = floor(N/2+1); %N/2
    nbFrames = ceil((length(data)-N)/M);
    Frames = zeros(nbFrames+1,N);
    for i = 0:nbFrames-1
        temp = data(i*M+1:i*M+N);
        Frames(i+1,1:N) = temp; 
    end

    % Last Frame..
    temp = zeros(1,N); 
    lastLength = length(data)- nbFrames*M;
    temp(1:lastLength) = data(nbFrames*M+1:(nbFrames*M +1 + lastLength-1));  
    Frames(nbFrames+1, 1:N) = temp;
    %% (2) Windowing..
    frameSize = size(Frames); 
    nbFrames = frameSize(1); 
    nbSamples = frameSize(2); 
 
    % Hamming window.. 
    w = hamming(nbSamples); 
    Windows = zeros(nbFrames,nbSamples);
    for i = 1:nbFrames
        temp = Frames(i,1:nbSamples); 
        Windows(i, 1:nbSamples) = w'.*temp; 
    end
    %% (3) Fourier Transform..
    ffts = fft(Windows');
    %% (4) Mel-frequency Wrapping..
    % (a) Calculate Power spectrum..
    PowSpecs = abs(ffts).^2;
    PowSpecs = PowSpecs(1:NN-1,:);
    % (b) Mel filter generation
    nof_c = 20; % Number of channels..
    df = fs/N;
    Nmax = N/2;
    fmax = fs/2;

    % Convert to mel scale..
    melmax = 2595*log10(1+fmax/700);

    melinc = melmax/(nof_c+1);

    melcenters = (1:nof_c).*melinc;

    % Convert to frequency scale.. 
    fcenters = 700*((10.^(melcenters./2595))-1);

    centerf = round(fcenters./df);

    startf = [1,centerf(1:nof_c-1)];
    stopf = [centerf(2:nof_c),Nmax];

    W = zeros(nof_c,Nmax);

    % Making filter..
    for i = 1:nof_c
        increment = 1.0/(centerf(i)-startf(i));
        for j = startf(i):centerf(i)
            W(i,j) = (j-startf(i))*increment;
        end
   
        decrement = 1.0/(stopf(i)-centerf(i));
        for j = centerf(i):stopf(i)
            W(i,j) = (j-centerf(i))*decrement;
        end 
    end
    % Normalising..
    for i = 1:nof_c
        W(i,:) = W(i,:)/sum(W(i,:));
    end
    
    % (c) Apply mel filters to Power spectrum coeffs..
    melPowSpecs = W*PowSpecs;
    % (d) MFCC calculations..
    melCeps = dct(log(melPowSpecs));
    melCeps(1,:) = [];
    
    load CodeBook
	dist_min = inf;
	spkr = 0;
	for ind = 1:length(codebook)
		single_cb =  codebook{ind};
		[M1, N1] = size(melCeps);
		[M2, N2] = size(single_cb);  
		dist_temp = zeros(N1,N2);
		if N1<N2
            repli = zeros(1,N2);
            for n1 = 1:N1
                dist_temp(n1,:) = sum((melCeps(:,n1+repli) - single_cb).^2,1);
            end
        else
            repli = zeros(1,N1);
            for n2 = 1:N2
                dist_temp(:,n2) = sum((melCeps - single_cb(:,n2+repli)).^2,1);
            end
        end
        dist_temp = sqrt(dist_temp);
		dist_val(ind) = sum(min(dist_temp,[],2))/size(dist_temp,1);
		if dist_val(ind) < dist_min
            dist_min = dist_val(ind);
			spkr = ind;
		end
	end
	msg = sprintf('The Speaker is found);
    	disp(msg);

pdf:


thank you Sir for you interest but i can't see the PDF file
 
cant you see the link there....i am attaching it here plz check...in the code i have commented a portion showing MFCC Coefficients i think that will be helpful for you... i know it is a complex code just take the portion you wanted

pdfView attachment Speaker Recognition - Using MFCC.pdf
 
Hi i have a code and pdf for feature extraction using MFCC for speaker recognition. I will attach that please check it and use it if helpful.

Code:
AI=analoginput('winsound',1);
addchannel(AI,1);
set(AI,'samplerate',16000);
set(AI,'samplespertrigger',2*16000);
set(AI,'triggertype','immediate');
start(AI);
data=getdata(AI);
fs=16000;
%file = sprintf('%s%d.wav','database_new kar',i);
%%wavwrite(data,16000,file);

%% Computing MFCC Co-efficients..
    %% (1) Frame Blocking..
    N = 256;   % N point FFT
    M = 100;   % Overlapping

    NN = floor(N/2+1); %N/2
    nbFrames = ceil((length(data)-N)/M);
    Frames = zeros(nbFrames+1,N);
    for i = 0:nbFrames-1
        temp = data(i*M+1:i*M+N);
        Frames(i+1,1:N) = temp; 
    end

    % Last Frame..
    temp = zeros(1,N); 
    lastLength = length(data)- nbFrames*M;
    temp(1:lastLength) = data(nbFrames*M+1:(nbFrames*M +1 + lastLength-1));  
    Frames(nbFrames+1, 1:N) = temp;
    %% (2) Windowing..
    frameSize = size(Frames); 
    nbFrames = frameSize(1); 
    nbSamples = frameSize(2); 
 
    % Hamming window.. 
    w = hamming(nbSamples); 
    Windows = zeros(nbFrames,nbSamples);
    for i = 1:nbFrames
        temp = Frames(i,1:nbSamples); 
        Windows(i, 1:nbSamples) = w'.*temp; 
    end
    %% (3) Fourier Transform..
    ffts = fft(Windows');
    %% (4) Mel-frequency Wrapping..
    % (a) Calculate Power spectrum..
    PowSpecs = abs(ffts).^2;
    PowSpecs = PowSpecs(1:NN-1,:);
    % (b) Mel filter generation
    nof_c = 20; % Number of channels..
    df = fs/N;
    Nmax = N/2;
    fmax = fs/2;

    % Convert to mel scale..
    melmax = 2595*log10(1+fmax/700);

    melinc = melmax/(nof_c+1);

    melcenters = (1:nof_c).*melinc;

    % Convert to frequency scale.. 
    fcenters = 700*((10.^(melcenters./2595))-1);

    centerf = round(fcenters./df);

    startf = [1,centerf(1:nof_c-1)];
    stopf = [centerf(2:nof_c),Nmax];

    W = zeros(nof_c,Nmax);

    % Making filter..
    for i = 1:nof_c
        increment = 1.0/(centerf(i)-startf(i));
        for j = startf(i):centerf(i)
            W(i,j) = (j-startf(i))*increment;
        end
   
        decrement = 1.0/(stopf(i)-centerf(i));
        for j = centerf(i):stopf(i)
            W(i,j) = (j-centerf(i))*decrement;
        end 
    end
    % Normalising..
    for i = 1:nof_c
        W(i,:) = W(i,:)/sum(W(i,:));
    end
    
    % (c) Apply mel filters to Power spectrum coeffs..
    melPowSpecs = W*PowSpecs;
    % (d) MFCC calculations..
    melCeps = dct(log(melPowSpecs));
    melCeps(1,:) = [];
    
    load CodeBook
	dist_min = inf;
	spkr = 0;
	for ind = 1:length(codebook)
		single_cb =  codebook{ind};
		[M1, N1] = size(melCeps);
		[M2, N2] = size(single_cb);  
		dist_temp = zeros(N1,N2);
		if N1<N2
            repli = zeros(1,N2);
            for n1 = 1:N1
                dist_temp(n1,:) = sum((melCeps(:,n1+repli) - single_cb).^2,1);
            end
        else
            repli = zeros(1,N1);
            for n2 = 1:N2
                dist_temp(:,n2) = sum((melCeps - single_cb(:,n2+repli)).^2,1);
            end
        end
        dist_temp = sqrt(dist_temp);
		dist_val(ind) = sum(min(dist_temp,[],2))/size(dist_temp,1);
		if dist_val(ind) < dist_min
            dist_min = dist_val(ind);
			spkr = ind;
		end
	end
	msg = sprintf('The Speaker is found);
    	disp(msg);

pdf:

this part is for what sir? please help me...:smile:
AI=analoginput('winsound',1);
addchannel(AI,1);
set(AI,'samplerate',16000);
set(AI,'samplespertrigger',2*16000);
set(AI,'triggertype','immediate');
start(AI);
 

this part is for what sir? please help me...:smile:
AI=analoginput('winsound',1);
addchannel(AI,1);
set(AI,'samplerate',16000);
set(AI,'samplespertrigger',2*16000);
set(AI,'triggertype','immediate');
start(AI);
This section is to connect a recording device for example a mic into matlab environment. Analog input function selects the input device. add channel will create the device object. then triggering properties are varied using the set keyword.
 

Hi i have a code and pdf for feature extraction using MFCC for speaker recognition. I will attach that please check it and use it if helpful.

Code:
AI=analoginput('winsound',1);
addchannel(AI,1);
set(AI,'samplerate',16000);
set(AI,'samplespertrigger',2*16000);
set(AI,'triggertype','immediate');
start(AI);
data=getdata(AI);
fs=16000;
%file = sprintf('%s%d.wav','database_new kar',i);
%%wavwrite(data,16000,file);

%% Computing MFCC Co-efficients..
    %% (1) Frame Blocking..
    N = 256;   % N point FFT
    M = 100;   % Overlapping

    NN = floor(N/2+1); %N/2
    nbFrames = ceil((length(data)-N)/M);
    Frames = zeros(nbFrames+1,N);
    for i = 0:nbFrames-1
        temp = data(i*M+1:i*M+N);
        Frames(i+1,1:N) = temp; 
    end

    % Last Frame..
    temp = zeros(1,N); 
    lastLength = length(data)- nbFrames*M;
    temp(1:lastLength) = data(nbFrames*M+1:(nbFrames*M +1 + lastLength-1));  
    Frames(nbFrames+1, 1:N) = temp;
    %% (2) Windowing..
    frameSize = size(Frames); 
    nbFrames = frameSize(1); 
    nbSamples = frameSize(2); 
 
    % Hamming window.. 
    w = hamming(nbSamples); 
    Windows = zeros(nbFrames,nbSamples);
    for i = 1:nbFrames
        temp = Frames(i,1:nbSamples); 
        Windows(i, 1:nbSamples) = w'.*temp; 
    end
    %% (3) Fourier Transform..
    ffts = fft(Windows');
    %% (4) Mel-frequency Wrapping..
    % (a) Calculate Power spectrum..
    PowSpecs = abs(ffts).^2;
    PowSpecs = PowSpecs(1:NN-1,:);
    % (b) Mel filter generation
    nof_c = 20; % Number of channels..
    df = fs/N;
    Nmax = N/2;
    fmax = fs/2;

    % Convert to mel scale..
    melmax = 2595*log10(1+fmax/700);

    melinc = melmax/(nof_c+1);

    melcenters = (1:nof_c).*melinc;

    % Convert to frequency scale.. 
    fcenters = 700*((10.^(melcenters./2595))-1);

    centerf = round(fcenters./df);

    startf = [1,centerf(1:nof_c-1)];
    stopf = [centerf(2:nof_c),Nmax];

    W = zeros(nof_c,Nmax);

    % Making filter..
    for i = 1:nof_c
        increment = 1.0/(centerf(i)-startf(i));
        for j = startf(i):centerf(i)
            W(i,j) = (j-startf(i))*increment;
        end
   
        decrement = 1.0/(stopf(i)-centerf(i));
        for j = centerf(i):stopf(i)
            W(i,j) = (j-centerf(i))*decrement;
        end 
    end
    % Normalising..
    for i = 1:nof_c
        W(i,:) = W(i,:)/sum(W(i,:));
    end
    
    % (c) Apply mel filters to Power spectrum coeffs..
    melPowSpecs = W*PowSpecs;
    % (d) MFCC calculations..
    melCeps = dct(log(melPowSpecs));
    melCeps(1,:) = [];
    
    load CodeBook
	dist_min = inf;
	spkr = 0;
	for ind = 1:length(codebook)
		single_cb =  codebook{ind};
		[M1, N1] = size(melCeps);
		[M2, N2] = size(single_cb);  
		dist_temp = zeros(N1,N2);
		if N1<N2
            repli = zeros(1,N2);
            for n1 = 1:N1
                dist_temp(n1,:) = sum((melCeps(:,n1+repli) - single_cb).^2,1);
            end
        else
            repli = zeros(1,N1);
            for n2 = 1:N2
                dist_temp(:,n2) = sum((melCeps - single_cb(:,n2+repli)).^2,1);
            end
        end
        dist_temp = sqrt(dist_temp);
		dist_val(ind) = sum(min(dist_temp,[],2))/size(dist_temp,1);
		if dist_val(ind) < dist_min
            dist_min = dist_val(ind);
			spkr = ind;
		end
	end
	msg = sprintf('The Speaker is found);
    	disp(msg);

pdf:

Hi sir,

i tried run your code but it seems there appear some "cut-off" triangular filters(normalized,image - MFCCfilters(Sir's Code) ) instead of the triangular filter ouput(only image) that i took from one internet source (normalized,bottom image of image - TriFilterBank(From other source)). Can sir explain to me about this ? Thanks. =)
 

Attachments

  • MFCCfilters(Sir's Code).bmp
    230.7 KB · Views: 298
  • TriFilterBank(From other source).png
    TriFilterBank(From other source).png
    20.3 KB · Views: 249
Hi sir,

i tried run your code but it seems there appear some "cut-off" triangular filters(normalized,image - MFCCfilters(Sir's Code) ) instead of the triangular filter ouput(only image) that i took from one internet source (normalized,bottom image of image - TriFilterBank(From other source)). Can sir explain to me about this ? Thanks. =)

Actually the code written above is for voice recognition and not for image recognition that may be the problem with you
 

Actually the code written above is for voice recognition and not for image recognition that may be the problem with you

No sir, you misunderstood what i meant. The images that i mentioned and attached are the MFCC filter graph plotted. Not related to image recognition at all. I tried your code to plot out the triangular filters but it seems some "cut-off" triangular filters there...
 
sir, if this is the code for feature extraction using mfcc, please could you give me the code for feature extraction using fft (i have to input the wav signal using wavread)
please could you help me.
 

Hi i have a code and pdf for feature extraction using MFCC for speaker recognition. I will attach that please check it and use it if helpful.

Code:
AI=analoginput('winsound',1);
addchannel(AI,1);
set(AI,'samplerate',16000);
set(AI,'samplespertrigger',2*16000);
set(AI,'triggertype','immediate');
start(AI);
data=getdata(AI);
fs=16000;
%file = sprintf('%s%d.wav','database_new kar',i);
%%wavwrite(data,16000,file);

%% Computing MFCC Co-efficients..
    %% (1) Frame Blocking..
    N = 256;   % N point FFT
    M = 100;   % Overlapping

    NN = floor(N/2+1); %N/2
    nbFrames = ceil((length(data)-N)/M);
    Frames = zeros(nbFrames+1,N);
    for i = 0:nbFrames-1
        temp = data(i*M+1:i*M+N);
        Frames(i+1,1:N) = temp; 
    end

    % Last Frame..
    temp = zeros(1,N); 
    lastLength = length(data)- nbFrames*M;
    temp(1:lastLength) = data(nbFrames*M+1:(nbFrames*M +1 + lastLength-1));  
    Frames(nbFrames+1, 1:N) = temp;
    %% (2) Windowing..
    frameSize = size(Frames); 
    nbFrames = frameSize(1); 
    nbSamples = frameSize(2); 
 
    % Hamming window.. 
    w = hamming(nbSamples); 
    Windows = zeros(nbFrames,nbSamples);
    for i = 1:nbFrames
        temp = Frames(i,1:nbSamples); 
        Windows(i, 1:nbSamples) = w'.*temp; 
    end
    %% (3) Fourier Transform..
    ffts = fft(Windows');
    %% (4) Mel-frequency Wrapping..
    % (a) Calculate Power spectrum..
    PowSpecs = abs(ffts).^2;
    PowSpecs = PowSpecs(1:NN-1,:);
    % (b) Mel filter generation
    nof_c = 20; % Number of channels..
    df = fs/N;
    Nmax = N/2;
    fmax = fs/2;

    % Convert to mel scale..
    melmax = 2595*log10(1+fmax/700);

    melinc = melmax/(nof_c+1);

    melcenters = (1:nof_c).*melinc;

    % Convert to frequency scale.. 
    fcenters = 700*((10.^(melcenters./2595))-1);

    centerf = round(fcenters./df);

    startf = [1,centerf(1:nof_c-1)];
    stopf = [centerf(2:nof_c),Nmax];

    W = zeros(nof_c,Nmax);

    % Making filter..
    for i = 1:nof_c
        increment = 1.0/(centerf(i)-startf(i));
        for j = startf(i):centerf(i)
            W(i,j) = (j-startf(i))*increment;
        end
   
        decrement = 1.0/(stopf(i)-centerf(i));
        for j = centerf(i):stopf(i)
            W(i,j) = (j-centerf(i))*decrement;
        end 
    end
    % Normalising..
    for i = 1:nof_c
        W(i,:) = W(i,:)/sum(W(i,:));
    end
    
    % (c) Apply mel filters to Power spectrum coeffs..
    melPowSpecs = W*PowSpecs;
    % (d) MFCC calculations..
    melCeps = dct(log(melPowSpecs));
    melCeps(1,:) = [];
    
    load CodeBook
	dist_min = inf;
	spkr = 0;
	for ind = 1:length(codebook)
		single_cb =  codebook{ind};
		[M1, N1] = size(melCeps);
		[M2, N2] = size(single_cb);  
		dist_temp = zeros(N1,N2);
		if N1<N2
            repli = zeros(1,N2);
            for n1 = 1:N1
                dist_temp(n1,:) = sum((melCeps(:,n1+repli) - single_cb).^2,1);
            end
        else
            repli = zeros(1,N1);
            for n2 = 1:N2
                dist_temp(:,n2) = sum((melCeps - single_cb(:,n2+repli)).^2,1);
            end
        end
        dist_temp = sqrt(dist_temp);
		dist_val(ind) = sum(min(dist_temp,[],2))/size(dist_temp,1);
		if dist_val(ind) < dist_min
            dist_min = dist_val(ind);
			spkr = ind;
		end
	end
	msg = sprintf('The Speaker is found);
    	disp(msg);

pdf:

Thank you sir for this code. I am also working of automatic speaker recognition with MFCC. Will you please share the complete code us for speaker recognition.??
 

Actually the code written above is for voice recognition and not for image recognition that may be the problem with you

APHNASEEM:

sir what is cookbook, is it file or function can you give me plz,..
 

    V

    Points: 2
    Helpful Answer Positive Rating
hello sir i tried this code but can't understand ...... this codebook.......... and also this is not working
please tell me
 

i am sorry sir the above code is not mine their is some mistake i uploaded another one which is in fact not mine i mean i didn't write i download it from internet site
 

is there any different between code of feature extraction using MFCC for speaker recognition and speech recognition?

- - - Updated - - -

Hi i have a code and pdf for feature extraction using MFCC for speaker recognition. I will attach that please check it and use it if helpful.

Code:
AI=analoginput('winsound',1);
addchannel(AI,1);
set(AI,'samplerate',16000);
set(AI,'samplespertrigger',2*16000);
set(AI,'triggertype','immediate');
start(AI);
data=getdata(AI);
fs=16000;
%file = sprintf('%s%d.wav','database_new kar',i);
%%wavwrite(data,16000,file);

%% Computing MFCC Co-efficients..
    %% (1) Frame Blocking..
    N = 256;   % N point FFT
    M = 100;   % Overlapping

    NN = floor(N/2+1); %N/2
    nbFrames = ceil((length(data)-N)/M);
    Frames = zeros(nbFrames+1,N);
    for i = 0:nbFrames-1
        temp = data(i*M+1:i*M+N);
        Frames(i+1,1:N) = temp; 
    end

    % Last Frame..
    temp = zeros(1,N); 
    lastLength = length(data)- nbFrames*M;
    temp(1:lastLength) = data(nbFrames*M+1:(nbFrames*M +1 + lastLength-1));  
    Frames(nbFrames+1, 1:N) = temp;
    %% (2) Windowing..
    frameSize = size(Frames); 
    nbFrames = frameSize(1); 
    nbSamples = frameSize(2); 
 
    % Hamming window.. 
    w = hamming(nbSamples); 
    Windows = zeros(nbFrames,nbSamples);
    for i = 1:nbFrames
        temp = Frames(i,1:nbSamples); 
        Windows(i, 1:nbSamples) = w'.*temp; 
    end
    %% (3) Fourier Transform..
    ffts = fft(Windows');
    %% (4) Mel-frequency Wrapping..
    % (a) Calculate Power spectrum..
    PowSpecs = abs(ffts).^2;
    PowSpecs = PowSpecs(1:NN-1,:);
    % (b) Mel filter generation
    nof_c = 20; % Number of channels..
    df = fs/N;
    Nmax = N/2;
    fmax = fs/2;

    % Convert to mel scale..
    melmax = 2595*log10(1+fmax/700);

    melinc = melmax/(nof_c+1);

    melcenters = (1:nof_c).*melinc;

    % Convert to frequency scale.. 
    fcenters = 700*((10.^(melcenters./2595))-1);

    centerf = round(fcenters./df);

    startf = [1,centerf(1:nof_c-1)];
    stopf = [centerf(2:nof_c),Nmax];

    W = zeros(nof_c,Nmax);

    % Making filter..
    for i = 1:nof_c
        increment = 1.0/(centerf(i)-startf(i));
        for j = startf(i):centerf(i)
            W(i,j) = (j-startf(i))*increment;
        end
   
        decrement = 1.0/(stopf(i)-centerf(i));
        for j = centerf(i):stopf(i)
            W(i,j) = (j-centerf(i))*decrement;
        end 
    end
    % Normalising..
    for i = 1:nof_c
        W(i,:) = W(i,:)/sum(W(i,:));
    end
    
    % (c) Apply mel filters to Power spectrum coeffs..
    melPowSpecs = W*PowSpecs;
    % (d) MFCC calculations..
    melCeps = dct(log(melPowSpecs));
    melCeps(1,:) = [];
    
    load CodeBook
	dist_min = inf;
	spkr = 0;
	for ind = 1:length(codebook)
		single_cb =  codebook{ind};
		[M1, N1] = size(melCeps);
		[M2, N2] = size(single_cb);  
		dist_temp = zeros(N1,N2);
		if N1<N2
            repli = zeros(1,N2);
            for n1 = 1:N1
                dist_temp(n1,:) = sum((melCeps(:,n1+repli) - single_cb).^2,1);
            end
        else
            repli = zeros(1,N1);
            for n2 = 1:N2
                dist_temp(:,n2) = sum((melCeps - single_cb(:,n2+repli)).^2,1);
            end
        end
        dist_temp = sqrt(dist_temp);
		dist_val(ind) = sum(min(dist_temp,[],2))/size(dist_temp,1);
		if dist_val(ind) < dist_min
            dist_min = dist_val(ind);
			spkr = ind;
		end
	end
	msg = sprintf('The Speaker is found);
    	disp(msg);

pdf:

is there any different between code of feature extraction using MFCC for speaker recognition and speech recognition?
 

hi can u help me how you created codebook in speech recognition
 

hello sir!
i tried to execute your code but there is an error in loading the codebook!
can u please help me solvig the problem :(
 

Status
Not open for further replies.

Part and Inventory Search

Welcome to EDABoard.com

Sponsor

Back
Top