[/QUOTE]
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library vvc_lib;
use vvc_lib.pkg_TrCoeffMatrix.all;
entity generic_multiplier is
generic (
g_data_width : natural := 512; -- 64 bytes (32 (16bit) coefficients)
g_coeff_width : natural := 16;
g_bl_size : natural := 4
);
port (
i_data_row : in std_ulogic_vector(g_data_width-1 downto 0); -- (M--> L)SB <= (|Pix_N|...|Pix_2|Pix1|) in memory
o_data_row : out std_ulogic_vector(2*g_bl_size*g_data_width-1 downto 0)
);
end entity generic_multiplier;
architecture rtl of generic_multiplier is
subtype t_dim1 is signed;
type t_dim1_vector is array (natural range <>) of t_dim1;
type t_dim2_vector is array (natural range <>) of t_dim1_vector;
-- type t_dim2_vector is array (0 to 3, 0 to 7) of std_ulogic_vector(7 downto 0);
signal s_data_in : t_dim2_vector (0 to g_bl_size-1)(0 to g_bl_size-1)(g_coeff_width-1 downto 0);
signal s_data_out : signed(2*g_bl_size*g_data_width-1 downto 0) := (others => '0');
begin
-- Reshape input data -- No need to change upto 4x4 block
gen00: for i in 0 to g_bl_size-1 generate
gen01: for j in 0 to g_bl_size-1 generate
s_data_in(i)(j) <= signed(i_data_row(g_coeff_width*(i*g_bl_size+j+1)-1 downto g_coeff_width*(i*g_bl_size+j)));
end generate gen01;
end generate gen00;
-- Matrix Multiplication
gen02: for i in 0 to g_bl_size-1 generate
gen03: for j in 0 to g_bl_size-1 generate
gen04: for k in 0 to g_bl_size-1 generate
s_data_out(2*g_coeff_width((i+j)*g_bl_size+k+1)-1 downto 2*g_coeff_width((i+j)*g_bl_size+k)) <= s_data_in(i)(k) * c_dct2_b4(k)(j);
end generate gen04;
end generate gen03;
end generate gen02;
-- Output
o_data_row(g_bl_size*g_bl_size*g_bl_size*2*g_coeff_width-1 downto 0) <= std_ulogic_vector(s_data_out(g_bl_size*g_bl_size*g_bl_size*2*g_coeff_width-1 downto 0));
o_data_row(s_data_out'length-1 downto g_bl_size*g_bl_size*g_bl_size*2*g_coeff_width) <= (others => '0');
end architecture rtl;
[QUOTE]