Commit 2c71597a authored by Wesley W. Terpstra's avatar Wesley W. Terpstra

altera gxb: remove timequest work-around and remove global routing of clk_tx

parent d931f4b2
......@@ -33,6 +33,9 @@ package wr_altera_pkg is
end component;
component wr_gxb_phy_arriaii
generic (
g_tx_latch_edge : std_logic := '1';
g_rx_latch_edge : std_logic := '0');
port (
clk_reconf_i : in std_logic;
clk_pll_i : in std_logic;
......@@ -52,8 +55,7 @@ package wr_altera_pkg is
rx_enc_err_o : out std_logic;
rx_bitslide_o : out std_logic_vector(3 downto 0);
pad_txp_o : out std_logic;
pad_rxp_i : in std_logic := '0';
dbg_tx_clk_o : out std_logic);
pad_rxp_i : in std_logic := '0');
end component;
end wr_altera_pkg;
......@@ -44,10 +44,10 @@
-- Transceiver Clocking in Arria II Devices <http://www.altera.com/literature/hb/arria-ii-gx/aiigx_52002.pdf>
-- Reset Control and Power Down in Arria II Devices <http://www.altera.com/literature/hb/arria-ii-gx/aiigx_52004.pdf>
-- Recommended Design Practices (Clock Gating) <http://www.altera.com/literature/hb/qts/qts_qii51006.pdf>
-- Achieving Timing Closure in Basic (PMA Direct) Functional Mode
-- <http://www.altera.com/literature/an/an580.pdf>
-- AN 610: Implementing Deterministic Latency for CPRI and OBSAI Protocols in Altera Devices
-- <http://www.altera.com/literature/an/an610.pdf>
-- Achieving Timing Closure in Basic (PMA Direct) Functional Mode
-- <http://www.altera.com/literature/an/an580.pdf>
library ieee;
use ieee.std_logic_1164.all;
......@@ -58,7 +58,9 @@ use work.gencores_pkg.all;
use work.disparity_gen_pkg.all;
entity wr_gxb_phy_arriaii is
generic (
g_tx_latch_edge : std_logic := '1';
g_rx_latch_edge : std_logic := '0');
port (
clk_reconf_i : in std_logic; -- 50 MHz
clk_pll_i : in std_logic; -- feeds transmitter PLL
......@@ -82,9 +84,7 @@ entity wr_gxb_phy_arriaii is
rx_bitslide_o : out std_logic_vector(3 downto 0); -- RX bitslide indication, indicating the delay of the RX path of the transceiver (in UIs). Must be valid when rx_data_o is valid.
pad_txp_o : out std_logic;
pad_rxp_i : in std_logic := '0';
dbg_tx_clk_o : out std_logic); -- do not use for anything other than an output on an oscilloscope
pad_rxp_i : in std_logic := '0');
end wr_gxb_phy_arriaii;
......@@ -155,8 +155,8 @@ architecture rtl of wr_gxb_phy_arriaii is
out_10b_o : out std_logic_vector(9 downto 0));
end component;
signal clk_rx : std_logic; -- local clock
signal clk_rx_glbl : std_logic; -- global clock
signal clk_rx_gxb : std_logic; -- pre clkctrl
signal clk_rx : std_logic; -- global clock
signal clk_tx : std_logic; -- local clock
signal pll_locked : std_logic;
signal rx_freqlocked : std_logic;
......@@ -187,31 +187,22 @@ architecture rtl of wr_gxb_phy_arriaii is
signal tx_disp_pipe : std_logic_vector (2 downto 0);
signal rx_bitslipboundaryselectout : std_logic_vector (4 downto 0);
signal rx_gxb_dataout : std_logic_vector (9 downto 0); -- signal out of GXB
signal rx_glbl_dataout : std_logic_vector (9 downto 0); -- globally clocked register
signal rx_gxb_syncstatus : std_logic;
signal rx_lcln_syncstatus : std_logic;
signal rx_lclp_syncstatus : std_logic;
signal rx_glbl_syncstatus : std_logic;
signal rx_gxb_dataout : std_logic_vector (9 downto 0); -- signal
signal rx_lcln_dataout : std_logic_vector (9 downto 0); -- local neg-edged register
signal rx_lclp_dataout : std_logic_vector (9 downto 0); -- local pos-edged register
signal rx_glbl_dataout : std_logic_vector (9 downto 0); -- global register (+1 inside decoder)
signal tx_enc_datain : std_logic_vector (9 downto 0); -- signal copy of register in encoder
signal tx_glbl_datain : std_logic_vector (9 downto 0); -- global register
signal tx_lcln_datain : std_logic_vector (9 downto 0); -- local neg-edged register
signal tx_lclp_datain : std_logic_vector (9 downto 0); -- local pos-edged register
signal tx_gxb_datain : std_logic_vector (9 downto 0); -- signal
signal tx_enc_datain : std_logic_vector (9 downto 0); -- registered encoder output (clk_pll_i)
signal tx_gxb_datain : std_logic_vector (9 downto 0); -- clock transfer register (clk_tx)
begin
rx_rbclk_o <= clk_rx_glbl;
dbg_tx_clk_o <= clk_tx; -- NOT FOR USE WITH tx_* signals
rx_rbclk_o <= clk_rx;
U_RxClkout : rxclkout
port map (
inclk => clk_rx,
outclk => clk_rx_glbl);
inclk => clk_rx_gxb,
outclk => clk_rx);
-- Altera PHY calibration block
U_Reconf : altgx_reconf
......@@ -230,7 +221,7 @@ begin
-- Derived clocks used for tx/rx lines
tx_clkout(0) => clk_tx,
pll_locked(0) => pll_locked,
rx_clkout(0) => clk_rx,
rx_clkout(0) => clk_rx_gxb,
rx_freqlocked(0) => rx_freqlocked,
rx_pll_locked(0) => open,
-- Calibration control of the GXB
......@@ -270,7 +261,7 @@ begin
-- Decode the RX data
decoder : dec_8b10b
port map(
clk_i => clk_rx_glbl,
clk_i => clk_rx,
rst_n_i => rx_8b10b_rstn(0),
in_10b_i => rx_glbl_dataout,
ctrl_o => rx_k_o,
......@@ -392,17 +383,17 @@ begin
-- Generate reset for the 8b10b decoder and ep_sync_detect
-- should use global version of clk_rx
p_rx_reset : process(clk_rx_glbl) is
p_rx_reset : process(clk_rx) is
begin
if rising_edge(clk_rx_glbl) then
if rising_edge(clk_rx) then
rx_8b10b_rstn <= (not rx_digitalreset) & rx_8b10b_rstn(rx_8b10b_rstn'left downto 1);
end if;
end process;
-- Dump the link if the bitslide changes
p_dump_link : process(clk_rx_glbl) is
p_dump_link : process(clk_rx) is
begin
if rising_edge(clk_rx_glbl) then
if rising_edge(clk_rx) then
if rx_glbl_syncstatus = '1' then
rx_dump_link <= (others => '1');
else
......@@ -411,15 +402,6 @@ begin
end if;
end process;
-- A slow signal that doesn't traverse the full RX sync path
-- should use a global version of clk_rx
p_rx_bitslide : process(clk_rx_glbl) is
begin
if rising_edge(clk_rx_glbl) then
rx_bitslide_o <= rx_bitslipboundaryselectout(3 downto 0);
end if;
end process;
-- The disparity should be delayed for WR
tx_disparity_o <= tx_disp_pipe(2);
p_delay_disp : process(clk_pll_i)
......@@ -430,68 +412,34 @@ begin
end if;
end process;
-- The extra registers are to allow signals enough time to reach the GXB
-- clk_pll_i may be a global clock
p_tx_path_pll : process(clk_pll_i) is
begin
if rising_edge(clk_pll_i) then
tx_glbl_datain <= tx_enc_datain;
-- tx_enc_datain is a registered output of enc_8b10b
-- Two back-to-back global registers should be enough to cross FPGA
end if;
end process;
-- Cross clock domain from pll_clk_i to tx_clk
-- Because they are async registers according to TimeQuest, they get placed
-- side-by-side like a synchronizer. However, they are actually in phase.
-- Thus we would get a hold violation unless we flip the clock edge.
-- tx_lcln_datain should use a local clock
p_tx_path_neg : process(clk_tx) is
begin
if falling_edge(clk_tx) then
tx_lcln_datain <= tx_glbl_datain;
end if;
end process;
-- tx_lclp_datain should use a local clock
p_tx_path_pos : process(clk_tx) is
-- These clocks are in phase copies of each other.
-- Ensure that clk_tx has GLOBAL_SIGNAL OFF
-- set_instance_assignment -name GLOBAL_SIGNAL OFF \
-- -from wr_gxb_phy_arriaii:wr_gxb_phy_arriaii_1|arria_phy:U_The_PHY|arria_phy_alt4gxb:arria_phy_alt4gxb_component|tx_clkout_int_wire[0] \
-- -to wr_gxb_phy_arriaii:wr_gxb_phy_arriaii_1|tx_gxb_datain[*]
p_tx_path : process(clk_tx) is
begin
if rising_edge(clk_tx) then
tx_lclp_datain <= tx_lcln_datain;
if clk_tx'event and clk_tx = g_tx_latch_edge then
tx_gxb_datain <= tx_enc_datain;
end if;
end process;
tx_gxb_datain <= tx_lclp_datain;
-- Use the negative edge to improve insertion timing from GXB
-- (the clock line from the GXB is slower than the data)
-- these register should use a local clock
p_rx_path_neg : process(clk_rx) is
-- Additional register to improve timings
p_rx_path : process(clk_rx) is
begin
if falling_edge(clk_rx) then
rx_lcln_dataout <= rx_gxb_dataout;
rx_lcln_syncstatus <= rx_gxb_syncstatus;
if clk_rx'event and clk_rx = g_rx_latch_edge then
rx_glbl_dataout <= rx_gxb_dataout;
rx_glbl_syncstatus <= rx_gxb_syncstatus;
end if;
end process;
-- these should use the local clock
p_rx_path_pos : process(clk_rx) is
-- Slow registered signals out of the GXB
p_rx_regs : process(clk_rx) is
begin
if rising_edge(clk_rx) then
rx_lclp_dataout <= rx_lcln_dataout;
rx_lclp_syncstatus <= rx_lcln_syncstatus;
rx_bitslide_o <= rx_bitslipboundaryselectout(3 downto 0);
end if;
end process;
-- these should use the global clock
p_rx_path_gbl : process(clk_rx_glbl) is
begin
if rising_edge(clk_rx_glbl) then
rx_glbl_dataout <= rx_lclp_dataout;
rx_glbl_syncstatus <= rx_lclp_syncstatus;
-- There is another register of rx_dataout inside dec_8b10b
-- Two back-to-back global registers should be enough to cross FPGA
end if;
end process;
end rtl;
......@@ -324,6 +324,11 @@ set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to slrdn_o
set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to slwrn_o
set_instance_assignment -name FAST_OUTPUT_REGISTER ON -to slrdn_o
set_instance_assignment -name FAST_OUTPUT_REGISTER ON -to slwrn_o
set_global_assignment -name PHYSICAL_SYNTHESIS_COMBO_LOGIC OFF
set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_RETIMING OFF
set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_DUPLICATION OFF
set_instance_assignment -name GLOBAL_SIGNAL OFF -from "wr_gxb_phy_arriaii:wr_gxb_phy_arriaii_1|arria_phy:U_The_PHY|arria_phy_alt4gxb:arria_phy_alt4gxb_component|tx_clkout_int_wire[0]" -to "wr_gxb_phy_arriaii:wr_gxb_phy_arriaii_1|tx_gxb_datain[*]"
set_global_assignment -name PHYSICAL_SYNTHESIS_EFFORT FAST
set_instance_assignment -name PARTITION_HIERARCHY root_partition -to | -section_id Top
set_global_assignment -name VHDL_FILE ../../../top/gsi_exploder/wr_core_demo/exploder_top.vhd
set_global_assignment -name VHDL_FILE ../../../modules/fabric/wr_fabric_pkg.vhd
......
......@@ -723,6 +723,11 @@ set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
set_global_assignment -name SYNCHRONIZER_IDENTIFICATION "FORCED IF ASYNCHRONOUS"
set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON
set_global_assignment -name PHYSICAL_SYNTHESIS_COMBO_LOGIC OFF
set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_RETIMING OFF
set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_DUPLICATION OFF
set_instance_assignment -name GLOBAL_SIGNAL OFF -from "wr_gxb_phy_arriaii:wr_gxb_phy_arriaii_1|arria_phy:U_The_PHY|arria_phy_alt4gxb:arria_phy_alt4gxb_component|tx_clkout_int_wire[0]" -to "wr_gxb_phy_arriaii:wr_gxb_phy_arriaii_1|tx_gxb_datain[*]"
set_global_assignment -name PHYSICAL_SYNTHESIS_EFFORT FAST
set_instance_assignment -name PARTITION_HIERARCHY root_partition -to | -section_id Top
set_global_assignment -name VHDL_FILE ../../../top/gsi_scu/wr_core_demo/scu_top.vhd
set_global_assignment -name VHDL_FILE ../../../modules/fabric/wr_fabric_pkg.vhd
......
derive_pll_clocks -create_base_clocks
create_clock -period 125Mhz -name sfp_ref_clk_i [get_ports {sfp_ref_clk_i}]
derive_clock_uncertainty
# Cut the clock domains from each other
# Note: ref_inst|* and wr_gxb*|transmit_pcs0 are actually the same clock
# However, we cut them to enforce a synchronizer between them in the GXB.
# For the same reason, we cut the RX pcs0 and pma0 clocks.
set_clock_groups -asynchronous \
-group { altera_reserved_tck } \
-group { clk_20m_vcxo_i dmtd_inst|* } \
-group { clk_125m_local_i sys_inst|* } \
-group { clk_125m_pllref_i ref_inst|* } \
-group { wr_gxb*|tx_pll0|* \
wr_gxb*|ch_clk_div0|* \
wr_gxb*|transmit_pma0|* \
wr_gxb*|transmit_pcs0|* } \
-group { sfp_ref_clk_i \
wr_gxb*|rx_cdr_pll0|* \
wr_gxb*|receive_pma0|* } \
-group { wr_gxb*|receive_pcs0|* }
create_clock -period 125Mhz -name sfp_ref_clk_i [get_ports {sfp_ref_clk_i}]
derive_pll_clocks -create_base_clocks
derive_clock_uncertainty
# Cut the clock domains from each other
set_clock_groups -asynchronous \
-group { altera_reserved_tck } \
-group { clk_20m_vcxo_i dmtd_inst|* } \
-group { clk_125m_local_i sys_inst|* } \
-group { clk_125m_pllref_i ref_inst|* \
wr_gxb*|tx_pll0|* \
wr_gxb*|ch_clk_div0|* \
wr_gxb*|transmit_pma0|* \
wr_gxb*|transmit_pcs0|* } \
-group { sfp_ref_clk_i \
wr_gxb*|rx_cdr_pll0|* \
wr_gxb*|receive_pma0|* \
wr_gxb*|receive_pcs0|* }
......@@ -380,7 +380,6 @@ architecture rtl of exploder_top is
signal phy_rx_bitslide : std_logic_vector(3 downto 0);
signal phy_rst : std_logic;
signal phy_loopen : std_logic;
signal dbg_tx_clk : std_logic;
signal wrc_master_i : t_wishbone_master_in;
signal wrc_master_o : t_wishbone_master_out;
......@@ -414,8 +413,9 @@ architecture rtl of exploder_top is
signal eca_lvds_ecl : std_logic_vector(15 downto 0);
signal eca_trigger : std_logic_vector(15 downto 0);
signal lemo_ttl : std_logic;
signal lemo_i : std_logic_vector(8 downto 1);
signal lemo_ttl : std_logic;
signal lemo_i : std_logic_vector(8 downto 1);
signal ref_toggle : std_logic;
signal di_scp : std_logic;
signal di_lp : std_logic;
......@@ -606,8 +606,7 @@ begin
rx_enc_err_o => phy_rx_enc_err,
rx_bitslide_o => phy_rx_bitslide,
pad_txp_o => sfp1_td_o,
pad_rxp_i => sfp1_rd_i,
dbg_tx_clk_o => dbg_tx_clk);
pad_rxp_i => sfp1_rd_i);
U_DAC_ARB : spec_serial_dac_arb
generic map (
......@@ -776,13 +775,7 @@ begin
hpv_o(7 downto 4) <= not eca_lemo_led(11 downto 8); -- ECA controls other LEDs
-- Baseboard logic analyzer (HPLA1)
hpw_io(1 downto 0) <= (others => 'Z'); -- too close to clock inputs
hpw_io(2) <= clk_ref; -- pin 17
hpw_io(3) <= clk_sys; -- pin 16
hpw_io(4) <= dbg_tx_clk; -- pin 15
hpw_io(5) <= phy_rx_rbclk; -- pin 14
hpw_io(6) <= clk_dmtd; -- pin 13
hpw_io(15 downto 7) <= (others => 'Z');
hpw_io(15 downto 0) <= (others => 'Z');
-- 20 is ground
-- Use output LEMOs in TTL mode
......@@ -792,9 +785,16 @@ begin
-- LEMO outputs
ttnim_o(8) <= ext_pps;
ttnim_o(1) <= clk_ref;
ttnim_o(7 downto 2) <= eca_lemo_led(6 downto 1);
ref_out : process(clk_ref) is
begin
if rising_edge(clk_ref) then
ref_toggle <= not ref_toggle;
end if;
end process;
ttnim_o(1) <= ref_toggle;
-- ECA outputs
lvds_o <= eca_lvds_ecl(7 downto 0);
ecl_o <= eca_lvds_ecl(15 downto 8);
......
derive_pll_clocks -create_base_clocks
create_clock -period 33Mhz -name LPC_FPGA_CLK [get_ports {LPC_FPGA_CLK}]
create_clock -period 100Mhz -name pcie_refclk_i [get_ports {pcie_refclk_i}]
create_clock -period 125Mhz -name sfp2_ref_clk_i [get_ports {sfp2_ref_clk_i}]
derive_clock_uncertainty
# Cut the clock domains from each other
# Note: ref_inst|* and wr_gxb*|transmit_pcs0 are actually the same clock
# However, we cut them to enforce a synchronizer between them in the GXB.
# For the same reason, we cut the RX pcs0 and pma0 clocks.
set_clock_groups -asynchronous \
-group { altera_reserved_tck } \
-group { LPC_FPGA_CLK } \
-group { clk_20m_vcxo_i dmtd_inst|* } \
-group { clk_125m_local_i sys_inst|* } \
-group { clk_125m_pllref_i ref_inst|* } \
-group { wr_gxb*|tx_pll0|* \
wr_gxb*|ch_clk_div0|* \
wr_gxb*|transmit_pma0|* \
wr_gxb*|transmit_pcs0|* } \
-group { sfp2_ref_clk_i \
wr_gxb*|rx_cdr_pll0|* \
wr_gxb*|receive_pma0|* } \
-group { wr_gxb*|receive_pcs0|* } \
-group { pcie_refclk_i \
PCIe*|tx_pll0|* \
PCIe*|central_clk_div0|* \
PCIe*|pllfixedclk \
PCIe*|coreclkout } \
-group { PCIe*|rx_cdr_pll0|* \
PCIe*|receive_pma0|* } \
-group { PCIe*|rx_cdr_pll1|* \
PCIe*|receive_pma1|* } \
-group { PCIe*|rx_cdr_pll2|* \
PCIe*|receive_pma2|* } \
-group { PCIe*|rx_cdr_pll3|* \
PCIe*|receive_pma3|* }
derive_pll_clocks -create_base_clocks
create_clock -period 33Mhz -name LPC_FPGA_CLK [get_ports {LPC_FPGA_CLK}]
create_clock -period 100Mhz -name pcie_refclk_i [get_ports {pcie_refclk_i}]
create_clock -period 125Mhz -name sfp2_ref_clk_i [get_ports {sfp2_ref_clk_i}]
derive_clock_uncertainty
# Cut the clock domains from each other
set_clock_groups -asynchronous \
-group { altera_reserved_tck } \
-group { LPC_FPGA_CLK } \
-group { clk_20m_vcxo_i dmtd_inst|* } \
-group { clk_125m_local_i sys_inst|* } \
-group { clk_125m_pllref_i ref_inst|* \
wr_gxb*|tx_pll0|* \
wr_gxb*|ch_clk_div0|* \
wr_gxb*|transmit_pma0|* \
wr_gxb*|transmit_pcs0|* } \
-group { sfp2_ref_clk_i \
wr_gxb*|rx_cdr_pll0|* \
wr_gxb*|receive_pma0|* \
wr_gxb*|receive_pcs0|* } \
-group { pcie_refclk_i \
PCIe*|tx_pll0|* \
PCIe*|central_clk_div0|* \
PCIe*|pllfixedclk \
PCIe*|coreclkout } \
-group { PCIe*|rx_cdr_pll0|* \
PCIe*|receive_pma0|* } \
-group { PCIe*|rx_cdr_pll1|* \
PCIe*|receive_pma1|* } \
-group { PCIe*|rx_cdr_pll2|* \
PCIe*|receive_pma2|* } \
-group { PCIe*|rx_cdr_pll3|* \
PCIe*|receive_pma3|* }
......@@ -253,7 +253,6 @@ architecture rtl of scu_top is
signal phy_rx_bitslide : std_logic_vector(3 downto 0);
signal phy_rst : std_logic;
signal phy_loopen : std_logic;
signal dbg_tx_clk : std_logic;
signal wrc_master_i : t_wishbone_master_in;
signal wrc_master_o : t_wishbone_master_out;
......@@ -462,8 +461,7 @@ begin
rx_enc_err_o => phy_rx_enc_err,
rx_bitslide_o => phy_rx_bitslide,
pad_txp_o => sfp2_txp_o,
pad_rxp_i => sfp2_rxp_i,
dbg_tx_clk_o => dbg_tx_clk);
pad_rxp_i => sfp2_rxp_i);
U_DAC_ARB : spec_serial_dac_arb
generic map (
......@@ -618,11 +616,12 @@ begin
leds_o(2) <= not eca_gpio(2);
leds_o(3) <= not eca_gpio(3);
hpla_ch(0) <= clk_ref;
hpla_ch(1) <= clk_sys;
hpla_ch(2) <= dbg_tx_clk;
hpla_ch(3) <= phy_rx_rbclk;
hpla_ch(4) <= clk_dmtd;
-- hpla_ch(0) <= clk_ref;
-- hpla_ch(1) <= clk_sys;
-- hpla_ch(2) <= dbg_tx_clk;
-- hpla_ch(3) <= phy_rx_rbclk;
-- hpla_ch(4) <= clk_dmtd;
hpla_ch <= (others => 'Z');
A_SysClock <= clk_scubus;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment