Error(13692): VHDL error at generic_multiplier.vhd(66): object cannot be indexed because it has natural type rather than array type

The code is as follows:

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library vvc_lib;
use vvc_lib.pkg_TrCoeffMatrix.all;

entity generic_multiplier is
  generic (
    g_data_width  : natural := 512;  -- 64 bytes (32 (16bit) coefficients)
    g_coeff_width : natural := 16;
    g_bl_size     : natural := 4
  );
  port (
    i_data_row : in  std_ulogic_vector(g_data_width-1 downto 0);  -- (M--> L)SB <= (|Pix_N|...|Pix_2|Pix1|) in memory
    o_data_row : out std_ulogic_vector(2*g_bl_size*g_data_width-1 downto 0)
  );
end entity generic_multiplier;

architecture rtl of generic_multiplier is
  subtype t_dim1 is signed;
  type t_dim1_vector is array (natural range <>) of t_dim1;
  type t_dim2_vector is array (natural range <>) of t_dim1_vector;
  -- type t_dim2_vector is array (0 to 3, 0 to 7) of std_ulogic_vector(7 downto 0);

  signal s_data_in  : t_dim2_vector (0 to g_bl_size-1)(0 to g_bl_size-1)(g_coeff_width-1 downto 0);
  signal s_data_out : signed(2*g_bl_size*g_data_width-1 downto 0) := (others => '0');

begin
  -- Reshape input data
  -- No need to change upto 4x4 block
  gen00: for i in 0 to g_bl_size-1 generate
    gen01: for j in 0 to g_bl_size-1 generate
      s_data_in(i)(j) <= signed(i_data_row(g_coeff_width*(i*g_bl_size+j+1)-1 downto g_coeff_width*(i*g_bl_size+j)));
    end generate gen01;
  end generate gen00;

  -- Matrix Multiplication
  gen02: for i in 0 to g_bl_size-1 generate
    gen03: for j in 0 to g_bl_size-1 generate
      gen04: for k in 0 to g_bl_size-1 generate
        s_data_out(2*g_coeff_width((i+j)*g_bl_size+k+1)-1 downto 2*g_coeff_width((i+j)*g_bl_size+k)) <= s_data_in(i)(k) * c_dct2_b4(k)(j);
      end generate gen04;
    end generate gen03;
  end generate gen02;

  -- Output
  o_data_row(g_bl_size*g_bl_size*g_bl_size*2*g_coeff_width-1 downto 0) <= std_ulogic_vector(s_data_out(g_bl_size*g_bl_size*g_bl_size*2*g_coeff_width-1 downto 0));
  o_data_row(s_data_out'length-1 downto g_bl_size*g_bl_size*g_bl_size*2*g_coeff_width) <= (others => '0');

end architecture rtl;

s_data_out(2*g_coeff_width((i+j)*g_bl_size+k+1)-1 downto 2*g_coeff_width((i+j)*g_bl_size+k)) <= s_data_in(i)(k) * c_dct2_b4(k)(j);
end generate gen04;
