Commit 23fb8730 authored by Adrian Byszuk's avatar Adrian Byszuk

Rework handshaking in TLP RX path. Fixes transmission errors under moderate and heavy loads

parent 83c1b882
......@@ -48,8 +48,8 @@ entity RxIn_Delay is
m_axis_rx_tbar_hit : in std_logic_vector(C_BAR_NUMBER-1 downto 0);
m_axis_rx_tready : out std_logic;
ddr_s2mm_cmd_tready : in std_logic;
ddr_s2mm_tready : in std_logic;
wb_FIFO_full : in std_logic;
cpld_ready : in std_logic;
mwr_ready : in std_logic;
-- Delay for one clock
m_axis_rx_tlast_dly : out std_logic;
......@@ -559,13 +559,13 @@ begin
TLP_is_Cpl <= '0';
TLP_is_CplLk <= '0';
else
if trn_rsof_n = '0' then
-- MRd
if m_axis_rx_tdata(C_TLP_FMT_BIT_TOP downto C_TLP_FMT_BIT_BOT) = C_FMT3_NO_DATA
and m_axis_rx_tdata(C_TLP_TYPE_BIT_TOP downto C_TLP_TYPE_BIT_BOT) = C_TYPE_MEM_REQ
and m_axis_rx_tdata(C_TLP_EP_BIT) = '0'
and m_axis_rx_tbar_hit(CINT_BAR_SPACES-1 downto 0) /= C_ALL_ZEROS(CINT_BAR_SPACES-1 downto 0)
and m_axis_rx_tvalid = '1'
and trn_rsof_n = '0'
then
TLP_is_MRd_H3DW <= '1';
else
......@@ -577,7 +577,6 @@ begin
and m_axis_rx_tdata(C_TLP_EP_BIT) = '0'
and m_axis_rx_tbar_hit(CINT_BAR_SPACES-1 downto 0) /= C_ALL_ZEROS(CINT_BAR_SPACES-1 downto 0)
and m_axis_rx_tvalid = '1'
and trn_rsof_n = '0'
then
TLP_is_MRd_H4DW <= '1';
else
......@@ -590,7 +589,6 @@ begin
and m_axis_rx_tdata(C_TLP_EP_BIT) = '0'
and m_axis_rx_tbar_hit(CINT_BAR_SPACES-1 downto 0) /= C_ALL_ZEROS(CINT_BAR_SPACES-1 downto 0)
and m_axis_rx_tvalid = '1'
and trn_rsof_n = '0'
then
TLP_is_MRdLk_H3DW <= '1';
else
......@@ -602,7 +600,6 @@ begin
and m_axis_rx_tdata(C_TLP_EP_BIT) = '0'
and m_axis_rx_tbar_hit(CINT_BAR_SPACES-1 downto 0) /= C_ALL_ZEROS(CINT_BAR_SPACES-1 downto 0)
and m_axis_rx_tvalid = '1'
and trn_rsof_n = '0'
then
TLP_is_MRdLk_H4DW <= '1';
else
......@@ -615,7 +612,6 @@ begin
and m_axis_rx_tdata(C_TLP_EP_BIT) = '0'
and m_axis_rx_tbar_hit(CINT_BAR_SPACES-1 downto 0) /= C_ALL_ZEROS(CINT_BAR_SPACES-1 downto 0)
and m_axis_rx_tvalid = '1'
and trn_rsof_n = '0'
then
TLP_is_MWr_H3DW <= '1';
else
......@@ -627,7 +623,6 @@ begin
and m_axis_rx_tdata(C_TLP_EP_BIT) = '0'
and m_axis_rx_tbar_hit(CINT_BAR_SPACES-1 downto 0) /= C_ALL_ZEROS(CINT_BAR_SPACES-1 downto 0)
and m_axis_rx_tvalid = '1'
and trn_rsof_n = '0'
then
TLP_is_MWr_H4DW <= '1';
else
......@@ -639,7 +634,6 @@ begin
and m_axis_rx_tdata(C_TLP_TYPE_BIT_TOP downto C_TLP_TYPE_BIT_BOT) = C_TYPE_COMPLETION
and m_axis_rx_tdata(C_TLP_EP_BIT) = '0'
and m_axis_rx_tvalid = '1'
and trn_rsof_n = '0'
then
TLP_is_CplD <= '1';
else
......@@ -650,7 +644,6 @@ begin
and m_axis_rx_tdata(C_TLP_TYPE_BIT_TOP downto C_TLP_TYPE_BIT_BOT) = C_TYPE_COMPLETION_LK
and m_axis_rx_tdata(C_TLP_EP_BIT) = '0'
and m_axis_rx_tvalid = '1'
and trn_rsof_n = '0'
then
TLP_is_CplDLk <= '1';
else
......@@ -661,7 +654,6 @@ begin
and m_axis_rx_tdata(C_TLP_TYPE_BIT_TOP downto C_TLP_TYPE_BIT_BOT) = C_TYPE_COMPLETION
and m_axis_rx_tdata(C_TLP_EP_BIT) = '0'
and m_axis_rx_tvalid = '1'
and trn_rsof_n = '0'
then
TLP_is_Cpl <= '1';
else
......@@ -672,7 +664,6 @@ begin
and m_axis_rx_tdata(C_TLP_TYPE_BIT_TOP downto C_TLP_TYPE_BIT_BOT) = C_TYPE_COMPLETION_LK
and m_axis_rx_tdata(C_TLP_EP_BIT) = '0'
and m_axis_rx_tvalid = '1'
and trn_rsof_n = '0'
then
TLP_is_CplLk <= '1';
else
......@@ -680,6 +671,7 @@ begin
end if;
end if;
end if;
end if;
end process;
-- --------------------------------------------------------------------------
......@@ -753,7 +745,7 @@ begin
end if;
when TK_MWr_3Hdr_C =>
m_axis_rx_tready_i <= '1';
m_axis_rx_tready_i <= mwr_ready;
if m_axis_rx_tlast = '1' and m_axis_rx_tlast_r1 = '0' -- raising edge
and m_axis_rx_tready_i = '1' then
FSM_TLP_Cnt <= TK_Idle;
......@@ -764,7 +756,7 @@ begin
end if;
when TK_MWr_4Hdr_C =>
m_axis_rx_tready_i <= '1';
m_axis_rx_tready_i <= mwr_ready;
if m_axis_rx_tlast = '1' and m_axis_rx_tlast_r1 = '0' -- raising edge
and m_axis_rx_tready_i = '1' then
FSM_TLP_Cnt <= TK_Idle;
......@@ -775,7 +767,7 @@ begin
end if;
when TK_Cpld_Hdr_C =>
m_axis_rx_tready_i <= '1';
m_axis_rx_tready_i <= cpld_ready;
if m_axis_rx_tlast = '1' and m_axis_rx_tlast_r1 = '0' -- raising edge
and m_axis_rx_tready_i = '1' then
FSM_TLP_Cnt <= TK_Idle;
......@@ -786,16 +778,13 @@ begin
end if;
when TK_Body =>
m_axis_rx_tready_i <= ((TLP_is_MWr_H4DW or TLP_is_MWr_H3DW) and mwr_ready) or (TLP_is_CplD and cpld_ready);
--for TLP body we can't wait for rising edge because there is a chance that TLP EOF
--will hit when *_tready_i = 0 which will cause deadlock
if m_axis_rx_tlast = '1' and m_axis_rx_tvalid = '1' and m_axis_rx_tready_i = '1' then
FSM_TLP_Cnt <= TK_Idle;
m_axis_rx_tready_i <= not(((MWr_on_Pool or CplD_on_Pool_i) and not(ddr_s2mm_tready))
or ((MWr_on_EB or CplD_on_EB_i) and wb_fifo_full));
else
FSM_TLP_Cnt <= TK_Body;
m_axis_rx_tready_i <= not(((MWr_on_Pool or CplD_on_Pool_i) and not(ddr_s2mm_tready))
or ((MWr_on_EB or CplD_on_EB_i) and wb_fifo_full));
end if;
when others =>
......
......@@ -44,6 +44,7 @@ entity rx_CplD_Transact is
m_axis_rx_tbar_hit : in std_logic_vector(C_BAR_NUMBER-1 downto 0);
CplD_Type : in std_logic_vector(3 downto 0);
cpld_ready : out std_logic;
Req_ID_Match : in std_logic;
usDex_Tag_Matched : in std_logic;
......@@ -80,6 +81,7 @@ entity rx_CplD_Transact is
wb_FIFO_wsof : out std_logic;
wb_FIFO_weof : out std_logic;
wb_FIFO_din : out std_logic_vector(C_DBUS_WIDTH-1 downto 0);
wb_FIFO_full : in std_logic;
-- Registers Write Port
Regs_WrEn : out std_logic;
......@@ -175,17 +177,15 @@ architecture Behavioral of rx_CplD_Transact is
signal m_axis_rx_tdata_Little_r3 : std_logic_vector (C_DBUS_WIDTH-1 downto 0);
signal m_axis_rx_tdata_Little_r4 : std_logic_vector (C_DBUS_WIDTH-1 downto 0);
-- signal m_axis_rx_tbar_hit_i : std_logic_vector(C_BAR_NUMBER-1 downto 0);
signal trn_rsof_n_i : std_logic;
signal in_packet_reg : std_logic;
signal cpld_ready_i : std_logic;
signal m_axis_rx_tlast_i : std_logic;
signal m_axis_rx_tlast_r1 : std_logic;
signal m_axis_rx_tlast_r2 : std_logic;
signal m_axis_rx_tlast_r3 : std_logic;
signal m_axis_rx_tlast_r4 : std_logic;
-- signal Tlp_has_4KB_r1 : std_logic;
signal m_axis_rx_tkeep_i : std_logic_vector(C_DBUS_WIDTH/8-1 downto 0);
signal m_axis_rx_tkeep_r1 : std_logic_vector(C_DBUS_WIDTH/8-1 downto 0);
signal m_axis_rx_tkeep_r2 : std_logic_vector(C_DBUS_WIDTH/8-1 downto 0);
......@@ -207,7 +207,6 @@ architecture Behavioral of rx_CplD_Transact is
signal ddr_s2mm_cmd_saddr : std_logic_vector(31 downto 0);
signal ddr_s2mm_tvalid_i : STD_LOGIC;
signal ddr_s2mm_tlast_i : STD_LOGIC;
signal ddr_s2mm_tready_r : STD_LOGIC;
-- Event Buffer write port
signal wb_FIFO_we_i : std_logic;
......@@ -316,15 +315,17 @@ architecture Behavioral of rx_CplD_Transact is
--signals of elastic buffer used to accomodate for handshaking delays between DDR/WB "ready" signals
--and PCIe core data pipeline
signal elbuf_din : std_logic_vector(C_ELBUF_WIDTH-1 downto 0);
signal elbuf_din : std_logic_vector(C_ELBUF_WIDTH-1 downto 0) := (others => '0');
signal elbuf_we : std_logic;
signal elbuf_dout, elbuf_dout_r : std_logic_vector(C_ELBUF_WIDTH-1 downto 0);
signal elbuf_dout: std_logic_vector(C_ELBUF_WIDTH-1 downto 0);
signal elbuf_re, elbuf_re_r, elbuf_re_st : std_logic;
signal elbuf_empty, elbuf_empty_r : std_logic;
signal elbuf_empty, elbuf_empty_r, elbuf_afull : std_logic;
begin
-- Event Buffer write
cpld_ready <= cpld_ready_i;
-- Wishbone fifo write
wb_FIFO_we <= wb_FIFO_we_i;
wb_FIFO_wsof <= wb_FIFO_wsof_i;
wb_FIFO_weof <= wb_FIFO_weof_i;
......@@ -1141,8 +1142,9 @@ begin
g_with_empty => true,
g_with_full => false,
g_with_almost_empty => false,
g_with_almost_full => false,
g_with_almost_full => true,
g_with_count => false,
g_almost_full_threshold => 26,
g_with_fifo_inferred => true)
port map(
rst_n_i => user_reset_n,
......@@ -1154,7 +1156,7 @@ begin
empty_o => elbuf_empty,
full_o => open,
almost_empty_o => open,
almost_full_o => open,
almost_full_o => elbuf_afull,
count_o => open
);
......@@ -1332,6 +1334,7 @@ begin
elbuf_we <= '0';
else
elbuf_we <= '0';
elbuf_din <= (others => '0');
case RxCplDTrn_State_r1 is
......@@ -1460,7 +1463,6 @@ begin
ddr_s2mm_cmd_tvalid_i <= '0';
ddr_s2mm_tvalid_i <= '0';
elbuf_re_st <= '1';
elbuf_dout_r <= (others => '0');
if elbuf_empty = '0' then
ddr_wr_state <= st_cmd;
elbuf_re_st <= '0';
......@@ -1477,6 +1479,7 @@ begin
--check if this is really a header, something went horribly if it isn't
if elbuf_dout(C_IS_HDR_BIT) = '0' then
ddr_wr_state <= st_idle;
elbuf_re_st <= '1';
elsif elbuf_dout(C_IS_HDR_BIT) = '1' and elbuf_dout(C_DDR_HIT_BIT) = '1' and ddr_s2mm_cmd_tready = '1' then
ddr_wr_state <= st_data;
elbuf_re_st <= '1';
......@@ -1488,7 +1491,7 @@ begin
ddr_s2mm_tkeep <= elbuf_dout(C_TKEEP_BTOP downto C_TKEEP_BBOT);
ddr_s2mm_tvalid_i <= not(elbuf_empty_r);
--stop reading if we are at the end of packet
elbuf_re_st <= not(elbuf_empty) and not(elbuf_dout_r(C_TLAST_BIT)) and not(elbuf_dout(C_TLAST_BIT));
elbuf_re_st <= not(elbuf_empty) and not(elbuf_dout(C_TLAST_BIT));
elbuf_empty_r <= elbuf_empty; --have to register only at data phase, otherwise tvalid will come too fast
if (elbuf_empty = '0' and elbuf_re = '1') or elbuf_dout(C_TLAST_BIT) = '1' then
--if it's the last word in a packet fifo will be already empty, so push last word unconditionally
......@@ -1509,13 +1512,14 @@ begin
process(user_clk)
begin
if rising_edge(user_clk) then
ddr_s2mm_tready_r <= ddr_s2mm_tready;
elbuf_re_r <= elbuf_re;
end if;
end process;
--stop reading *in the same clock cycle* that receiver goes out-of-ready
--or it's last word in packet. Otherwise we'll lose one word, usually a header
elbuf_re <= (elbuf_re_st and ddr_s2mm_cmd_tready) when ddr_wr_state = st_idle else
(elbuf_re_st and ddr_s2mm_tready);
(elbuf_re_st and ddr_s2mm_tready and not(elbuf_dout(C_TLAST_BIT)));
concat_rd <= m_axis_rx_tdata_r3(31 downto 0) & m_axis_rx_tdata_r4(63 downto 32);
......@@ -1626,6 +1630,19 @@ begin
end if;
end if;
end process;
process(user_clk)
begin
if rising_edge(user_clk) then
if DDR_space_hit = '1' then
cpld_ready_i <= not(elbuf_afull);
elsif FIFO_space_hit = '1' then
cpld_ready_i <= not(wb_fifo_full);
else
cpld_ready_i <= '1';
end if;
end if;
end process;
-- ---------------------------------
-- Regenerate trn_rsof_n signal as in old TRN core
--
......
......@@ -44,6 +44,7 @@ entity rx_MRd_Transact is
m_axis_rx_tkeep : in std_logic_vector(C_DBUS_WIDTH/8-1 downto 0);
m_axis_rx_terrfwd : in std_logic;
m_axis_rx_tvalid : in std_logic;
m_axis_rx_tready : in std_logic;
-- m_axis_rx_tready : OUT std_logic;
rx_np_ok : out std_logic;
rx_np_req : out std_logic;
......@@ -165,7 +166,7 @@ begin
rx_np_req_i <= rx_np_ok_i;
-- ( m_axis_rx_tvalid seems never deasserted during packet)
trn_rx_throttle <= not m_axis_rx_tvalid; -- or m_axis_rx_tready_i;
trn_rx_throttle <= not(m_axis_rx_tvalid) or not(m_axis_rx_tready);
-- ------------------------------------------------
-- Synchronous Delay: m_axis_rx_tdata + m_axis_rx_tbar_hit
......@@ -211,7 +212,7 @@ begin
when ST_MRd_IDLE =>
if rx_np_ok_i = '1' then
if rx_np_ok_i = '1' and trn_rx_throttle = '0' then
case MRd_Type is
......@@ -241,7 +242,7 @@ begin
when ST_MRd_Tail => -- support back-to-back transactions
if rx_np_ok_i = '1' then
if rx_np_ok_i = '1' and trn_rx_throttle = '0' then
case MRd_Type is
......
This diff is collapsed.
......@@ -278,6 +278,8 @@ architecture Behavioral of rx_Transact is
signal tRAM_addrB : std_logic_vector(C_TAGRAM_AWIDTH-1 downto 0);
signal tRAM_dinB : std_logic_vector(C_TAGRAM_DWIDTH-1 downto 0);
--transmission ready signals from CplD, MWr
signal cpld_ready, mwr_ready : std_logic;
begin
......@@ -319,8 +321,8 @@ begin
m_axis_rx_tbar_hit => m_axis_rx_tbar_hit , -- IN std_logic_vector(C_BAR_NUMBER-1 downto 0);
m_axis_rx_tready => m_axis_rx_tready , -- OUT std_logic;
ddr_s2mm_cmd_tready => ddr_s2mm_cmd_tready,
ddr_s2mm_tready => ddr_s2mm_tready,
wb_FIFO_full => wb_FIFO_full , -- IN std_logic;
cpld_ready => cpld_ready,
mwr_ready => mwr_ready,
-- Delayed
m_axis_rx_tlast_dly => m_axis_rx_tlast_dly , -- OUT std_logic;
......@@ -372,6 +374,7 @@ begin
m_axis_rx_tkeep => m_axis_rx_tkeep_dly, -- IN std_logic_vector(C_DBUS_WIDTH/8-1 downto 0);
m_axis_rx_terrfwd => m_axis_rx_terrfwd_dly, -- IN std_logic;
m_axis_rx_tvalid => m_axis_rx_tvalid_dly, -- IN std_logic;
m_axis_rx_tready => m_axis_rx_tready_dly,
m_axis_rx_tbar_hit => m_axis_rx_tbar_hit_dly, -- IN std_logic_vector(6 downto 0);
-- m_axis_rx_tready => open, -- m_axis_rx_tready_MRd, -- OUT std_logic;
rx_np_ok => rx_np_ok, -- OUT std_logic;
......@@ -415,12 +418,14 @@ begin
MWr_Type => MWr_Type , -- IN std_logic_vector(1 downto 0);
Tlp_has_4KB => Tlp_has_4KB , -- IN std_logic;
mwr_ready => mwr_ready,
-- Event Buffer write port
wb_FIFO_we => wb_FIFO_we_MWr , -- OUT std_logic;
wb_FIFO_wsof => wb_FIFO_wsof_MWr , -- OUT std_logic;
wb_FIFO_weof => wb_FIFO_weof_MWr , -- OUT std_logic;
wb_FIFO_din => wb_FIFO_din_MWr , -- OUT std_logic_vector(C_DBUS_WIDTH-1 downto 0);
wb_fifo_full => wb_fifo_full,
-- To registers module
Regs_WrEn => Regs_WrEn0 , -- OUT std_logic;
......@@ -458,6 +463,7 @@ begin
m_axis_rx_tready => m_axis_rx_tready_dly, -- IN std_logic;
m_axis_rx_tbar_hit => m_axis_rx_tbar_hit_dly, -- IN std_logic_vector(6 downto 0);
cpld_ready => cpld_ready,
CplD_Type => CplD_Type, -- IN std_logic_vector(3 downto 0);
Req_ID_Match => Req_ID_Match, -- IN std_logic;
......@@ -495,6 +501,7 @@ begin
wb_FIFO_wsof => wb_FIFO_wsof_CplD , -- OUT std_logic;
wb_FIFO_weof => wb_FIFO_weof_CplD , -- OUT std_logic;
wb_FIFO_din => wb_FIFO_din_CplD , -- OUT std_logic_vector(C_DBUS_WIDTH-1 downto 0);
wb_fifo_full => wb_fifo_full,
-- To registers module
Regs_WrEn => Regs_WrEn1, -- OUT std_logic;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment