this is the top module file:
`timescale 1ns / 10ps
`define DATA_LEN 1024 // Length of Data Samples
//`define DATA_LEN 2048 // Length of Data Samples
`define COEFF_LEN 2 // Length of coefficients
`define COEFF_MEM_WIDTH 10 // Coeff Memory width
`define DATA_MEM_WIDTH 12 // Data Memory width
//`include "div_gen_v2_0.v"
module NLMS(
RST,
CLK,
BUSY,
Y_Bar,
X_Bar,
Xorg_Bar,
ERROR
);
input RST; //Reset
input CLK; //Clock
output BUSY; // Busy signal goes to one when NLMS is being performed
output [15:0] Y_Bar; //output to the Test bench
output [15:0] X_Bar; //output to the Test bench
output [15:0] Xorg_Bar; //output to the Test bench
output [15:0] ERROR; //error
parameter [3:0] //state variable
S1 = 4'b0000,
S1_2 = 4'b1000,
S2 = 4'b0001,
S3 = 4'b0011,
S4 = 4'b0010,
S5 = 4'b0110,
s5_temp = 4'b1110,
S6 = 4'b0111,
S7 = 4'b0101,
S8 = 4'b0100,
S9 = 4'b1100,
S10= 4'b1101;
parameter [2:0] // power calculation states
Init = 3'b000,
Fetch = 3'b001,
Wait = 3'b011,
Read = 3'b010,
Square = 3'b110,
Accumulate = 3'b111,
Divide = 3'b101,
Next = 3'b100;
parameter [`COEFF_MEM_WIDTH-1:0] COEFF_ptr = 10'h000; // Coefficient address pointer
parameter [`DATA_MEM_WIDTH-1:0] DATA_ptr = 12'h000; // Input data address pointer
parameter [`DATA_MEM_WIDTH-1:0] REF_ptr = 12'h400; // Reference echo data address pointer
parameter [`DATA_MEM_WIDTH-1:0] RESULT_ptr= 12'h800; // result address pointer
//parameter [`DATA_MEM_WIDTH-1:0] REF_ptr = 12'h000; // Reference echo data address pointer
reg Overflow; // Overflow bit
reg BUSY; // Busy
reg [15:0] K_counter; // Counter to count samples being processed
reg [15:0] J_counter; // Counter to count coefficients of the filter being precesed
reg [15:0] K_val; // Counter to count in power estimation
reg [`COEFF_MEM_WIDTH-1:0] Coeff_address;
reg [`DATA_MEM_WIDTH-1:0] Data_address;
reg [`DATA_MEM_WIDTH-1:0] Refrence_address;
reg [`DATA_MEM_WIDTH-1:0] Result_address;
reg [33:0] mue; // mu multiplied by error
reg [33:0] ex; // error multiplied by xorg_k
reg [15:0] mu; // step size
reg [15:0] error; // error = echo - estimated echo
reg [15:0] error_reg;
reg[31:0] Power;
reg[39:0] Power_ACC; // >> by // Power accumulator
reg[15:0] x_k; // echo
reg[16:0] xorg_k; // original signal
reg[15:0] y_k; // estimated echo
reg[16:0] h_j; // coefficient from memory
reg[33:0] h_new; // new coefficient
reg[16:0] h_new_acc; // coeff acc
reg[33:0] y_hat; //estimated output
reg[31:0] y_hat_acc; // estimated output ACC
reg[31:0] y_hat_acc_P;
reg [3:0] state; // state machine logic
reg [2:0] power; // power
reg [3:0] next_state;
reg read_flag;
reg [9:0] C_WRA_0; // Coeff write read address
reg [15:0] C_WD_0; // Coeff write data
reg C_R_en0; // Coeff read enable
reg C_W_en0; // Coeff write enable
wire [15:0] C_DOUT_0; // couefficient memory output port
reg [11:0] D_WRA_0; // Data write read address 0
reg [11:0] D_RA_1; // Data read address 1
reg [15:0] D_WD_0; // Data memory write address
reg D_R_en0; // Data memory read enable
reg D_R_en1;
reg D_W_en0;
wire RFD;
reg [1:0] DIVI; // Divisor
wire [23:0] FRAC; // Frction
wire [15:0] D_DOUT_0; // Data out from RAM 0
wire [15:0] D_DOUT_1; // Data out from RAM 1
reg [15:0] x_k_Bar; //outputs
reg [15:0] y_Bar;
reg [15:0] xorg_k_Bar;
reg [15:0] sq1; // square result from multipliers 4 and 5
reg [15:0] sq2;
reg [31:0] Square1;
reg [31:0] Square2;
reg [23:0] Acc;
wire [1:0] QUO;
wire [16:0] a1; // multiplier 1 ports
wire [16:0] b1;
wire [33:0] p1;
wire [16:0] a2; // multiplier 2 ports
wire [16:0] b2;
wire [33:0] p2;
wire [16:0] a3; // multiplier 3 ports
wire [16:0] b3;
//wire [33:0] p3;
reg [33:0] p3;
wire [16:0] a4; // multiplier 4 ports
wire [16:0] b4;
wire [33:0] p4;
wire [16:0] a5; // multiplier 5 ports
wire [16:0] b5;
wire [33:0] p5;
reg [33:0] p2_temp;
assign ERROR = error;
assign a1 = h_j; //y_hat <= h_j * xorg_k;
assign b1 = xorg_k;
//assign a2 = {mue[30],mue[30:15]}; //h_new <= mue[30:15] * xorg_k;
//assign b2 = xorg_k;
//assign a3 = {mu[15],mu[15:0]}; //mue <= mu * error;
//assign b3 = {error[15],error[15:0]};
assign a2 = {error[15],error[15:0]}; //ex <= error * xorg_k;
assign b2 = xorg_k;
//assign a3 = mu;//{mu[15],mu[15:0]}; //mue <= mu * error;
//assign b3 = {ex[30],ex[30:15]};
//assign b3 = {p2[30],p2[30:15]};
assign a4 = {sq1[15],sq1[15:0]}; //square 1;
assign b4 = {sq1[15],sq1[15:0]};
assign a5 = {sq2[15],sq2[15:0]}; //square 2;
assign b5 = {sq2[15],sq2[15:0]};
//assign p3 = ex >> 8; //h_new <= ex * mu;
assign Y_Bar = y_Bar;
assign X_Bar = x_k_Bar;
assign Xorg_Bar = xorg_k_Bar;
/*mult1 m1( // 17 X 17 fixed point signed multipliers - combinational
.clk(CLK),
.a(a1),
.b(b1),
.p(p1)
);
mult1 m2(
.clk(CLK),
.a(a2),
.b(b2),
.p(p2) // muxorg_k
);
mult1 m4(
.clk(CLK),
.a(a4),
.b(b4),
.p(p4)
);
mult1 m5(
.clk(CLK),
.a(a5),
.b(b5),
.p(p5)
);*/
multiplier m1( // 17 X 17 fixed point signed multipliers - combinational
//.clk(CLK),
.a(a1),
.b(b1),
.p(p1)
);
multiplier m2(
//.clk(CLK),
.a(a2),
.b(b2),
.p(p2) // muxorg_k
);
/*
multiplier m3(
//.clk(CLK),
.a(a3),
.b(b3),
.p(p3)
);
*/
multiplier m4(
//.clk(CLK),
.a(a4),
.b(b4),
.p(p4)
);
multiplier m5(
//.clk(CLK),
.a(a5),
.b(b5),
.p(p5)
);
div_gen_v2_1 div(
.rfd(rfd),
.clk(CLK),
.dividend(DIVI),
.quotient(QUO),
.divisor({Acc[23:2],2'b11}),
.fractional(FRAC)
); /* output rfd;
input clk;
input [1 : 0] dividend;
output [1 : 0] quotient;
input [23 : 0] divisor;
output [15 : 0] fractional;
*/
//coeff ram inst
COEFF_SARAM c1(
.RST(RST),
.CLK(CLK),
.RF_WRA_0(C_WRA_0),
.RF_WD_0(C_WD_0),
.RF_R_en0(C_R_en0),
.RF_W_en0(C_W_en0),
.RF_DOUT_0(C_DOUT_0)
);
//data ram inst
DATA_DARAM d1(
.RST(RST),
.CLK(CLK),
.RF_WRA_0(D_WRA_0),
.RF_RA_1(D_RA_1),
.RF_WD_0(D_WD_0),
.RF_R_en0(D_R_en0),
.RF_R_en1(D_R_en1),
.RF_W_en0(D_W_en0),
.RF_DOUT_0(D_DOUT_0),
.RF_DOUT_1(D_DOUT_1)
);
//state machine
always@(posedge CLK) begin:START_STATE_MACHINE_LOGIC
if(RST)
state <= S1;
case(state)
S1: begin // initialization of all variables
//mue <= 34'h00000000;
ex <= 34'h00000000;
// mu <= 16'h0dd; //.0035 // mu initial
// mu <= 16'h400; //.03125 // mu initial
mu <= 16'h2000; //.25 // mu initial
//mu <= 16'h4000; //.5 // mu initial
//mu <= 16'h100; //.0039
error <= 16'h0000; // error signal init
Power <= 32'h00000000;
Power_ACC <= 40'h0000000000; // >> by 15
x_k <= 16'h0000;
xorg_k <= 17'h00000;
y_k <= 16'h0000;
h_j <= 17'h00000;
sq1 <= 16'h0000;
sq2 <= 16'h0000;
Square1 <= 34'h00000;
Square2 <= 34'h00000;
y_hat <= 34'h000000000;
y_hat_acc <= 32'h0000000000;
y_hat_acc_P <= 32'h0000000000;
h_new <= 34'h000000000;
h_new_acc <= 16'h00000;
Acc <= 24'h000000;
BUSY <= 1'b1;
Overflow <= 1'b0;
K_counter <= 16'h0000;
J_counter <= 16'h0000;
K_val <= 16'h0000;
Coeff_address <= 10'h0000;
Data_address <= 12'h0000;
Refrence_address <= 12'h0000;
Result_address <= 12'h0000;
read_flag <= 1'b0;
DIVI <= 2'b01;
state <= S1_2;
$display($time, "State 1 complete.");
end
S1_2: begin
$display($time, "State 12 complete. error = %h",error);
$display($time, "State TEMP complete. y_hat_acc = %h", y_hat_acc);
if (K_counter >= `COEFF_LEN) begin // begin k counter and get the input and output samples and coeffifcient
case(Power) // calculate power
Init : begin //get counter
K_val <= K_counter;
D_W_en0 <= 1'b0;
Power <= Fetch;
end
Fetch : begin //start fetching the input samples from DARAM
$display($time, "Value of Acc for K_val %h is %h.",K_val,Acc);
D_WRA_0 <= {DATA_ptr[11:10], K_val[9:0]}; // get x[k]......the echo
D_RA_1 <= {DATA_ptr[11:10], (K_val[9:0] - 1)};
D_R_en0 <= 1'b1; //enable DARAM read
D_R_en1 <= 1'b1; //enable DARAM read
Power <= Wait;
end
Wait : begin
Power <= Read;
K_val <= K_val - 2;
D_R_en0 <= 1'b0; //enable DARAM read
D_R_en1 <= 1'b0; //enable DARAM read
end
Read : begin
sq1 <= D_DOUT_0; // calculates squares of the two samples fetched
sq2 <= D_DOUT_1;
Power <= Square;
end
Square: begin
Square1[30:0] <= p4[30:0]; // store the products
Square2[30:0] <= p5[30:0];
Power <= Accumulate;
end
Accumulate: begin // accumulate the products
$display($time, "Square of sq1 = %h is Square1 %h.",sq1, Square1[30:15]);
$display($time, "Square of sq2 = %h is Square2 %h.",sq2, Square2[30:15]);
Acc <= Acc + Square1[30:15] + Square2[30:15];
if(K_val <= (K_counter - `COEFF_LEN) )
Power <= Divide;
else
Power <= Fetch;
end
Divide: begin
//mu <= FRAC;
Power <= Init;
state <= S2;
//$stop;
end
endcase
end
else
state <= S2;
end
S2: begin // calculate mu and power and fetch x_k
$display($time, "Inverse of Acc = %h is FRAC %h x_k = %h.",Acc, {4'h0,FRAC[7:0],4'h0},x_k);
D_WRA_0 <= {REF_ptr[11:10], K_counter[9:0]}; // get x[k]......the echo
D_R_en0 <= 1'b1; //enable DARAM read
D_W_en0 <= 1'b0; //enable DARAM read
Acc <= 24'h000000;
y_hat_acc <= 32'h00000000;
y_hat_acc_P <= 32'h00000000;
//mu <= {4'h0,FRAC[7:0],4'h0} ;
//p3 <= ex >> 8; //h_new <= ex * mu;
//p2_temp <= p2;
read_flag <= 1'b1;
state <= S3;
end
S3:
begin // calculate address of x_org_k and h_j and defetch x_k
$display($time, "State 2 complete mue = %h, D_W_en0 = %h, D_WRA_0 = %h and D_R_en0 = %h x_k = %h a1= %h b1= %h p1 = %h a2= %h b2= %h p2 = %h.",mue, D_W_en0,D_WRA_0,D_R_en0,x_k,a1,b1,p1,a2,b2,p2);
D_WRA_0 <= {DATA_ptr[11:10], (K_counter[9:0] - J_counter[9:0])}; // get xorg[k-j]
//D_WRA_0 <= {DATA_ptr[11:10], J_counter[9:0]}; // get xorg[k-j]
C_WRA_0 <= J_counter[9:0]; // get h[j]
C_W_en0 <= 1'b0; // disable coeff SARAM write update
D_R_en0 <= 1'b1;
C_R_en0 <= 1'b1; // read new coefficent in next clock
state <= S4;
end
S4: begin // disable rams and get x[k]
$display($time, "State DEBUG mu = %h, p3= %h.",mu,p3);
$display($time, "State 3 complete D_WRA_0 = %h, C_WRA_0 = %h and C_R_en0 = %h x_k = %h a1= %h b1= %h p1 = %h a2= %h b2= %h p2 = %h.",D_WRA_0, C_WRA_0, C_R_en0,x_k,a1,b1,p1,a2,b2,p2);
if(read_flag)
begin
x_k <= D_DOUT_0;
end
state <= S5;
end
S5:
begin // get values of
$display($time, "State 4 complete x_k = %h.",x_k);
D_R_en0 <= 1'b0; //disable DARAM read
C_R_en0 <= 1'b0; // disable coeff SARAM read
xorg_k <= {D_DOUT_0[15], D_DOUT_0}; // sign extension for fixed point multiplication
h_j <= {C_DOUT_0[15], C_DOUT_0};
if(read_flag)
begin
xorg_k_Bar <= D_DOUT_0;
end
read_flag <= 0;
state <= s5_temp;
end
s5_temp:
begin
$display($time, "State 5 complete xorg_k = %h, h_j = %h x_k = %h a1= %h b1= %h p1 = %h a2= %h b2= %h p2 = %h.",xorg_k, h_j,x_k,a1,b1,p1,a2,b2,p2);
if(read_flag)
begin
//mue <= p3;
ex <= p2;
//p3<= p2 >> 8;
end
state <= S6;
end
S6:
begin // multiply for h_j and y_hat
$display($time, "State 5_temp complete xorg_k = %h, ex = %h.",xorg_k, ex);
y_hat[30:0] <= p1[30:0] ; // <= h_j * xorg_k;
h_new <= ex >> 8;;//h_new <= ex[30:15] * mu;
state <= S7;
end
S7:
begin // accumulate h_j and y_hat
$display($time, "State DEBUG_hnew h_new= %h.",h_new);
$display($time, "State 6 complete. yhat = %h y_hat_acc = %h and h_new = %h a2= %h b2= %h p2 = %h p3 =%h.",y_hat_acc,y_hat,h_new,a2,b2, p2, p3);
y_hat_acc_P <= {y_hat[30], y_hat[30:0]} + {y_hat_acc[30],y_hat_acc[30:0]} ;
h_new_acc <= {h_j[15],h_j[15:0]} + {h_new[30],h_new[30:15]} ; // got 17 bits of new h[j]
state <= S8;
end
S8:
begin // store for h[j] and increment j
$display($time, "State 7 complete. y_hat_acc_P = %h, h_j = %h h_new_acc=%h.",y_hat_acc_P, h_j, h_new_acc);
y_hat <= 34'h000000000;
h_new <= 34'h000000000;
if((h_j[15] == 0) && (h_new[30] == 0) && (h_new_acc[15] == 1))
begin
C_WD_0 <= 16'h7fff;
$display("Saturation in H[j] at time %t",$time);
end
else if((h_j[15] == 1) && (h_new[30] == 1) && (h_new_acc[15] == 0))
begin
C_WD_0 <= 16'h8000;
$display("Saturation in H[j] at time %t",$time);
end
else
begin
C_WD_0 <= h_new_acc[15:0];
end
if (K_counter >= `COEFF_LEN)
begin
C_W_en0 <= 1;
end
if ((y_hat_acc[30] == 0) && (y_hat[30] == 0) && (y_hat_acc_P[30] == 1))
begin
y_hat_acc[30:0] <= 31'h3fffffff;
$display("Saturation in Y[k] accumulator at time %t",$time);
end
else if ((y_hat_acc[30] == 1) && (y_hat[30] == 1) && (y_hat_acc_P[30] == 0))
begin
y_hat_acc[30:0] <= 31'h40000000;
$display("Saturation in Y[k] accumulator at time %t",$time);
end
else
begin
y_hat_acc <= y_hat_acc_P;
end
if (J_counter == (`COEFF_LEN - 1) )
begin
state <= S9;
J_counter <= 16'h0000;
$display("going to state S9 at time %t",$time);
end
else if(J_counter < (`COEFF_LEN - 1) )
begin
state <= S3;
J_counter <= J_counter + 1;
$display("going to state S3 , writing %h on C_WD_0 and incrementing J_Counter = %d at time %t",h_new_acc[15:0], J_counter, $time);
end
end
S9: begin
$display($time, "State TEST complete. K_counter= %d x_k = %h y_hat_acc_P = %h",K_counter, x_k , y_hat_acc_P[30:15]);
if(K_counter == `DATA_LEN )
begin
state <= S10;
$display("going to state S10 at time %t",$time);
end
else if(K_counter < `DATA_LEN )
begin
K_counter <= K_counter + 1;
state <= S1_2;
D_WRA_0 <= {RESULT_ptr[11:10], K_counter[9:0]};
D_WD_0 <= y_hat_acc[30:15];
if (K_counter >= `COEFF_LEN)
begin
y_Bar <= y_hat_acc[30:15];
error <= x_k - y_hat_acc_P[30:15];
end
else
begin
error <= x_k ;
y_Bar <= 0;
end
x_k_Bar <= x_k;
D_W_en0 <= 1'b1; //enable DARAM write
$display("Incrementing K_counter, saving Y[k]for k_counter = %d and going to state S1_2 at time %t",K_counter, $time);
$display($time, "State 9 complete. error = %h",error);
// $display($time, "State 9 complete. K_counter = %h error = %h",K_counter, error);
end
end
S10: begin
// $display($time, "State 9 complete. error = %h",error);
$display("end of processing",$time);
BUSY <= 1'b0;
$stop ;
$stop ;
end
default: begin
next_state <= S1;
$display($time, "Wrong state entered");
end
endcase
end
endmodule