Commit 591f2efd authored by Grzegorz Daniluk's avatar Grzegorz Daniluk Committed by Alessandro Rubini

spec: update endpoint driver

parent ea97d5be
/*
* Copyright 2011 Tomasz Wlostowski <tomasz.wlostowski@cern.ch> for CERN
* Modified by Alessandro Rubini for ptp-proposal/proto
*
* GNU LGPL 2.1 or later versions
*/
#include <pptp/pptp.h>
#include "../spec.h"
#include "syscon.h"
#include <endpoint.h>
#include <hw/endpoint_regs.h>
#include <hw/endpoint_mdio.h>
#define UIS_PER_SERIAL_BIT 800
static int autoneg_enabled;
/* Length of a single bit on the gigabit serial link in picoseconds. Used for calculating deltaRx/deltaTx
from the serdes bitslip value */
#define PICOS_PER_SERIAL_BIT 800
static volatile struct EP_WB *EP = (volatile struct EP_WB *) BASE_EP;
/* Number of raw phase samples averaged by the DMTD detector in the Endpoint during single phase measurement.
The bigger, the better precision, but slower rate */
#define DMTD_AVG_SAMPLES 256
/* functions for accessing PCS registers */
static int autoneg_enabled;
static volatile struct EP_WB *EP = (volatile struct EP_WB *) BASE_EP;
/* functions for accessing PCS (MDIO) registers */
static uint16_t pcs_read(int location)
{
EP->MDIO_CR = EP_MDIO_CR_ADDR_W(location >> 2);
while ((EP->MDIO_SR & EP_MDIO_SR_READY) == 0);
return EP_MDIO_SR_RDATA_R(EP->MDIO_SR) & 0xffff;
while ((EP->MDIO_ASR & EP_MDIO_ASR_READY) == 0);
return EP_MDIO_ASR_RDATA_R(EP->MDIO_ASR) & 0xffff;
}
static void pcs_write(int location,
int value)
static void pcs_write(int location, int value)
{
EP->MDIO_CR = EP_MDIO_CR_ADDR_W(location >> 2)
| EP_MDIO_CR_DATA_W(value)
| EP_MDIO_CR_RW;
while ((EP->MDIO_SR & EP_MDIO_SR_READY) == 0);
while ((EP->MDIO_ASR & EP_MDIO_ASR_READY) == 0);
}
/* MAC address setting */
static void set_mac_addr(uint8_t dev_addr[])
{
EP->MACL = ((uint32_t)dev_addr[2] << 24)
......@@ -57,18 +63,25 @@ void get_mac_addr(uint8_t dev_addr[])
}
/* Initializes the endpoint and sets its local MAC address */
void ep_init(uint8_t mac_addr[])
{
set_mac_addr(mac_addr);
EP->ECR = 0;
*(unsigned int *)(0x62000) = 0x2; // reset network stuff (cleanup required!)
*(unsigned int *)(0x62000) = 0;
EP->ECR = 0; /* disable Endpoint */
EP->VCR0 = EP_VCR0_QMODE_W(3); /* disable VLAN unit - not used by WRPC */
EP->RFCR = EP_RFCR_MRU_W(1518); /* Set the max RX packet size */
EP->TSCR = EP_TSCR_EN_TXTS | EP_TSCR_EN_RXTS; /* Enable timestamping */
/* Configure DMTD phase tracking */
EP->DMCR = EP_DMCR_EN | EP_DMCR_N_AVG_W(DMTD_AVG_SAMPLES);
EP->RFCR = 3 << EP_RFCR_QMODE_SHIFT;
EP->TSCR = EP_TSCR_EN_TXTS | EP_TSCR_EN_RXTS;
EP->FCR = 0;
}
/* Enables/disables transmission and reception. When autoneg is set to 1,
starts up 802.3 autonegotiation process */
int ep_enable(int enabled, int autoneg)
{
uint16_t mcr;
......@@ -79,28 +92,38 @@ int ep_enable(int enabled, int autoneg)
return 0;
}
EP->ECR = EP_ECR_TX_EN_FRA | EP_ECR_RX_EN_FRA | EP_ECR_RST_CNT;
/* Disable the endpoint */
EP->ECR = 0;
/* Load default packet classifier rules - see ep_pfilter.c for details */
pfilter_init_default();
/* Enable TX/RX paths, reset RMON counters */
EP->ECR = EP_ECR_TX_EN | EP_ECR_RX_EN | EP_ECR_RST_CNT;
autoneg_enabled = autoneg;
#if 1
/* Reset the GTP Transceiver - it's important to do the GTP phase alignment every time
we start up the software, otherwise the calibration RX/TX deltas may not be correct */
pcs_write(MDIO_REG_MCR, MDIO_MCR_PDOWN); /* reset the PHY */
spec_udelay(2000 * 1000);
spec_udelay(200);
pcs_write(MDIO_REG_MCR, MDIO_MCR_RESET); /* reset the PHY */
pcs_write(MDIO_REG_MCR, 0); /* reset the PHY */
// pcs_write(MDIO_REG_MCR, MDIO_MCR_RESET); /* reset the PHY */
#endif
/* Don't advertise anything - we don't want flow control */
pcs_write(MDIO_REG_ADVERTISE, 0);
mcr = MDIO_MCR_SPEED1000_MASK | MDIO_MCR_FULLDPLX_MASK;
if(autoneg)
mcr |= MDIO_MCR_ANENABLE | MDIO_MCR_ANRESTART;
pcs_write(MDIO_REG_MCR, mcr);
return 0;
}
int ep_link_up()
/* Checks the link status. If the link is up, returns non-zero
and stores the Link Partner Ability (LPA) autonegotiation register at *lpa */
int ep_link_up(uint16_t *lpa)
{
uint16_t flags = MDIO_MSR_LSTATUS;
volatile uint16_t msr;
......@@ -108,25 +131,26 @@ int ep_link_up()
if(autoneg_enabled)
flags |= MDIO_MSR_ANEGCOMPLETE;
msr = pcs_read(MDIO_REG_MSR);
msr = pcs_read(MDIO_REG_MSR);
msr = pcs_read(MDIO_REG_MSR); /* Read this flag twice to make sure the status is updated */
if(lpa) *lpa = pcs_read(MDIO_REG_LPA);
return (msr & flags) == flags ? 1 : 0;
}
/* Returns the TX/RX latencies. They are valid only when the link is up. */
int ep_get_deltas(uint32_t *delta_tx, uint32_t *delta_rx)
{
// mprintf("called ep_get_deltas()\n");
/* fixme: these values should be stored in calibration block in the EEPROM on the FMC. Also, the TX/RX delays of a particular SFP
should be added here */
*delta_tx = 0;
*delta_rx = 15000 - 7000 + 195000 + 32000
+ UIS_PER_SERIAL_BIT
* MDIO_WR_SPEC_BSLIDE_R(pcs_read(MDIO_REG_WR_SPEC))
+ 2800 - 9000;
*delta_rx = 15000 - 7000 + 195000 + 32000 + PICOS_PER_SERIAL_BIT * MDIO_WR_SPEC_BSLIDE_R(pcs_read(MDIO_REG_WR_SPEC)) + 2800 - 9000 - 40000 + 2700;
return 0;
}
/* Prints out the RMON statistic counters */
void ep_show_counters()
{
int i;
......@@ -137,6 +161,7 @@ void ep_show_counters()
int ep_get_psval(int32_t *psval)
{
uint32_t val;
val = EP->DMSR;
if(val & EP_DMSR_PS_RDY)
......@@ -144,7 +169,7 @@ int ep_get_psval(int32_t *psval)
else
*psval = 0;
return val & EP_DMSR_PS_RDY;
return val & EP_DMSR_PS_RDY ? 1 : 0;
}
int ep_cal_pattern_enable()
......
/* Endpoint Packet Filter/Classifier driver
A little explanation: The WR core needs to classify the incoming packets into
two (or more categories):
- PTP, ARP, DHCP packets, which should go to the WRCore CPU packet queue (mini-nic)
- Other packets matching user's provided pattern, which shall go to the external fabric
port - for example to Etherbone, host network controller, etc.
- packets to be dropped (used neither by the WR Core or the user application)
WR Endpoint (WR MAC) inside the WR Core therefore contains a simple microprogrammable
packet filter/classifier. The classifier processes the incoming packet, and assigns it
to one of 8 classes (an 8-bit word, where each bit corresponds to a particular class) or
eventually drops it. Hardware implementation of the unit is a simple VLIW processor with
32 single-bit registers (0 - 31). The registers are organized as follows:
- 0: don't touch (always 0)
- 1 - 22: general purpose registers
- 23: drop packet flag: if 1 at the end of the packet processing, the packet will be dropped.
- 24..31: packet class (class 0 = reg 24, class 7 = reg 31).
Program memory has 64 36-bit words. Packet filtering program is restarted every time a new packet comes.
There are 5 possible instructions:
1. CMP offset, value, mask, oper, Rd:
------------------------------------------
* Rd = Rd oper ((((uint16_t *)packet) [offset] & mask) == value)
Examples:
* CMP 3, 0xcafe, 0xffff, MOV, Rd
will compare the 3rd word of the packet (bytes 6, 7) against 0xcafe and if the words are equal,
1 will be written to Rd register.
* CMP 4, 0xbabe, 0xffff, AND, Rd
will do the same with the 4th word and write to Rd its previous value ANDed with the result
of the comparison. Effectively, Rd now will be 1 only if bytes [6..9] of the payload contain word
0xcafebabe.
Note that the mask value is nibble-granular. That means you can choose a particular
set of nibbles within a word to be compared, but not an arbitrary set of bits (e.g. 0xf00f, 0xff00
and 0xf0f0 masks are ok, but 0x8001 is wrong.
2. BTST offset, bit_number, oper, Rd
------------------------------------------
* Rd = Rd oper (((uint16_t *)packet) [offset] & (1<<bit_number) ? 1 : 0)
Examples:
* BTST 3, 10, MOV, 11
will write 1 to reg 11 if the 10th bit in the 3rd word of the packet is set (and 0 if it's clear)
3. Logic opearations:
-----------------------------------------
* LOGIC2 Rd, Ra, OPER Rb - 2 argument logic (Rd = Ra OPER Rb). If the operation is MOV or NOT, Ra is
taken as the source register.
* LOGIC3 Rd, Ra, OPER Rb, OPER2, Rc - 3 argument logic Rd = (Ra OPER Rb) OPER2 Rc.
4. Misc
-----------------------------------------
FIN instruction terminates the program.
NOP executes a dummy instruction (LOGIC2 0, 0, AND, 0)
IMPORTANT:
- the program counter is advanved each time a 16-bit words of the packet arrives.
- the CPU doesn't have any interlocks to simplify the HW, so you can't compare the
10th word when PC = 2. Max comparison offset is always equal to the address of the instruction.
- Code may contain up to 64 operations, but it must classify shorter packets faster than in
32 instructions (there's no flow throttling)
*/
//#include <stdio.h>
#include "../spec.h"
#include <endpoint.h>
#include <hw/endpoint_regs.h>
#define PFILTER_MAX_CODE_SIZE 32
#define pfilter_dbg
static volatile struct EP_WB *EP = (volatile struct EP_WB *) BASE_EP;
static const uint64_t PF_MODE_LOGIC = (1ULL<<34);
static const uint64_t PF_MODE_CMP = 0ULL;
static int code_pos;
static uint64_t code_buf[32];
/* begins assembling a new packet filter program */
void pfilter_new()
{
code_pos = 0;
}
static void check_size()
{
if(code_pos == PFILTER_MAX_CODE_SIZE - 1)
{
pfilter_dbg("microcode: code too big (max size: %d)\n", PFILTER_MAX_CODE_SIZE);
}
}
static void check_reg_range(int val, int minval, int maxval, char *name)
{
if(val < minval || val > maxval)
{
pfilter_dbg("microcode: %s register out of range (%d to %d)", name, minval,maxval);
}
}
void pfilter_cmp(int offset, int value, int mask, pfilter_op_t op, int rd)
{
uint64_t ir;
check_size();
if(offset > code_pos)
pfilter_dbg("microcode: comparison offset is bigger than current PC. Insert some nops before comparing");
check_reg_range(rd, 1, 15, "ra/rd");
ir = (PF_MODE_CMP | ((uint64_t)offset << 7)
| ((mask & 0x1) ? (1ULL<<29) : 0)
| ((mask & 0x10) ? (1ULL<<30) : 0)
| ((mask & 0x100) ? (1ULL<<31) : 0)
| ((mask & 0x1000) ? (1ULL<<32) : 0))
| op | (rd << 3);
ir = ir | ((uint64_t)value & 0xffffULL) << 13;
code_buf[code_pos++] = ir;
}
// rd = (packet[offset] & (1<<bit_index)) op rd
void pfilter_btst(int offset, int bit_index, pfilter_op_t op, int rd)
{
uint64_t ir;
check_size();
if(offset > code_pos)
pfilter_dbg("microcode: comparison offset is bigger than current PC. Insert some nops before comparing");
check_reg_range(rd, 1, 15, "ra/rd");
check_reg_range(bit_index, 0, 15, "bit index");
ir = ((1ULL<<33) | PF_MODE_CMP | ((uint64_t)offset << 7) | ((uint64_t)bit_index << 29) | (uint64_t)op | ((uint64_t)rd << 3));
code_buf[code_pos++] = ir;
}
void pfilter_nop()
{
uint64_t ir;
check_size();
ir = PF_MODE_LOGIC;
code_buf[code_pos++] = ir;
}
// rd = ra op rb
void pfilter_logic2(int rd, int ra, pfilter_op_t op, int rb)
{
uint64_t ir;
check_size();
check_reg_range(ra, 0, 31, "ra");
check_reg_range(rb, 0, 31, "rb");
check_reg_range(rd, 1, 31, "rd");
ir = ((uint64_t)ra << 8) | ((uint64_t)rb << 13) | (((uint64_t)rd & 0xf) << 3) | (((uint64_t)rd & 0x10) ? (1ULL<<7) : 0) | (uint64_t)op;
ir = ir | PF_MODE_LOGIC | (3ULL<<23);
code_buf[code_pos++] = ir;
}
static void pfilter_logic3(int rd, int ra, pfilter_op_t op, int rb, pfilter_op_t op2, int rc)
{
uint64_t ir;
check_size();
check_reg_range(ra, 0, 31, "ra");
check_reg_range(rb, 0, 31, "rb");
check_reg_range(rc, 0, 31, "rb");
check_reg_range(rd, 1, 31, "rd");
ir = (ra << 8) | (rb << 13) | (rc << 18) | ((rd & 0xf) << 3) | ((rd & 0x10) ? (1<<7) : 0) | op;
ir = ir | PF_MODE_LOGIC | (op2<<23);
code_buf[code_pos++] = ir;
}
/* Terminates the microcode, loads it to the endpoint and enables the pfilter */
void pfilter_load()
{
int i;
code_buf[code_pos++] = (1ULL<<35); // insert FIN instruction
EP->PFCR0 = 0; // disable pfilter
for(i=0;i<code_pos;i++)
{
uint32_t cr0, cr1;
cr1 = EP_PFCR1_MM_DATA_LSB_W(code_buf[i] & 0xfff);
cr0 = EP_PFCR0_MM_ADDR_W(i) | EP_PFCR0_MM_DATA_MSB_W(code_buf[i]>>12) | EP_PFCR0_MM_WRITE_MASK;
EP->PFCR1 = cr1;
EP->PFCR0 = cr0;
}
EP->PFCR0 = EP_PFCR0_ENABLE;
}
/* sample packet filter initialization:
- redirects broadcasts and PTP packets to the WR Core
- redirects unicasts addressed to self with ethertype 0xa0a0 to the external fabric */
#define R_CLASS(x) (24 + x)
#define R_DROP 23
void pfilter_init_default()
{
pfilter_new();
pfilter_nop();
pfilter_cmp(0, 0xffff, 0xffff, MOV, 1);
pfilter_cmp(1, 0xffff, 0xffff, AND, 1);
pfilter_cmp(2, 0xffff, 0xffff, AND, 1); /* r1 = 1 when dst mac is broadcast */
pfilter_cmp(0, 0x011b, 0xffff, MOV, 2);
pfilter_cmp(1, 0x1900, 0xffff, AND, 2);
pfilter_cmp(2, 0x0000, 0xffff, AND, 2); /* r2 = 1 when dst mac is PTP multicast (01:1b:19:00:00:00) */
pfilter_cmp(0, EP->MACH & 0xffff, 0xffff, MOV, 3);
pfilter_cmp(1, EP->MACL >> 16, 0xffff, AND, 3);
pfilter_cmp(2, EP->MACL & 0xffff, 0xffff, AND, 3); /* r3 = 1 when the packet is unicast to our own MAC */
pfilter_cmp(6, 0xa0a0, 0xffff, MOV, 4); /* r4 = 1 when ethertype = 0xa0a0 */
pfilter_cmp(6, 0x88f7, 0xffff, MOV, 5); /* r5 = 1 when ethertype = PTPv2 */
pfilter_logic3(7, 3, AND, 4, OR, 5); /* r5 = PTP or etherbone */
pfilter_logic2(R_DROP, 7, NOT, 0); /* Neither PTP or etherbone? drop */
pfilter_logic2(R_CLASS(7), 3, AND, 4); // class 7: minibone unicasts
pfilter_logic2(R_CLASS(0), 5, MOV, 0); // class 7: minibone unicasts
pfilter_load();
}
#ifndef __ENDPOINT_H
#define __ENDPOINT_H
#define DMTD_AVG_SAMPLES 256
#define DMTD_MAX_PHASE 16384
typedef enum {
AND=0,
NAND=4,
OR=1,
NOR=5,
XOR=2,
XNOR=6,
MOV=3,
NOT=7
} pfilter_op_t;
void ep_init(uint8_t mac_addr[]);
void get_mac_addr(uint8_t dev_addr[]);
int ep_enable(int enabled, int autoneg);
int ep_link_up();
int ep_get_deltas(uint32_t *delta_tx, uint32_t *delta_rx);
int ep_get_psval(int32_t *psval);
int ep_cal_pattern_enable();
int ep_cal_pattern_disable();
void pfilter_new();
void pfilter_cmp(int offset, int value, int mask, pfilter_op_t op, int rd);
void pfilter_btst(int offset, int bit_index, pfilter_op_t op, int rd);
void pfilter_nop();
void pfilter_logic2(int rd, int ra, pfilter_op_t op, int rb);
static void pfilter_logic3(int rd, int ra, pfilter_op_t op, int rb, pfilter_op_t op2, int rc);
void pfilter_load();
void pfilter_init_default();
#endif
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment