module inner
(
input clk, rst,
input [31:0] din,
output reg [31:0] dout
);
reg [ 1:0] cnt;
always @(posedge clk)
if ( rst ) cnt <= 0;
else cnt <= cnt + 1;
reg [31:0] arr[2:0];
reg [31:0] arr_out[3:0];
always @(posedge clk)
case ( cnt )
0, 1, 2 : begin arr[cnt] <= din; dout <= arr_out[cnt]; end
3 : begin
arr_out[0] <= {arr[0][31:24],arr[1][23:16],arr[2][15:8], din[7:0]};
arr_out[1] <= {arr[1][31:24],arr[2][23:16], din[15:8],arr[0][7:0]};
arr_out[2] <= {arr[2][31:24], din[23:16],arr[0][15:8],arr[1][7:0]};
arr_out[3] <= { din[31:24],arr[0][23:16],arr[1][15:8],arr[2][7:0]};
dout <= arr_out[3];
end
endcase
endmodule