Commit 83c8618b authored by Maciej Lipinski's avatar Maciej Lipinski

ISPCS2014 article about PPSi

parent a3fa84be
all : ppsi.pdf
.PHONY : all clean
ppsi.pdf : ppsi.tex
pdflatex -dPDFSETTINGS=/prepress -dSubsetFonts=true -dEmbedAllFonts=true -dMaxSubsetPct=100 -dCompatibilityLevel=1.4 $^
bibtex ppsi
pdflatex -dPDFSETTINGS=/prepress -dSubsetFonts=true -dEmbedAllFonts=true -dMaxSubsetPct=100 -dCompatibilityLevel=1.4 $^
pdflatex -dPDFSETTINGS=/prepress -dSubsetFonts=true -dEmbedAllFonts=true -dMaxSubsetPct=100 -dCompatibilityLevel=1.4 $^
clean :
# $(MAKE) clean -C fig
rm -f *.eps *.pdf *.dat *.log *.out *.aux *.dvi *.ps *~ *.blg *.bbl
\ No newline at end of file
@electronic{wr,
title = "{White Rabbit}",
howpublished = {\url{http://www.ohwr.org/projects/white-rabbit}}
}
@article{icalepcs2011,
author = "M. Lipi\'{n}ski and J. Serrano and T. W\l{}ostowski and C. Prados",
title = "{Reliability In A White Rabbit Network}",
journal = "Proceedings of ICALEPCS",
address = "Grenoble, France",
year = "2011",
}
@standard{ieee8023,
title = "IEEE Standard for
Information Technology--Telecommunications and Information Exchange Between
Systems--Local and Metropolitan Area Networks--Specific Requirements Part 3:
Carrier Sense Multiple Access With Collision Detection (CSMA/CD) Access Method
and Physical Layer Specifications - Section Three",
year = "2008",
organization = "IEEE",
address = "New York",
number = "802.3-2008",
}
@standard{synce,
title = "Timing characteristics of a synchronous Ethernet equipment slave clock {(EEC)}",
year = "2007",
number = "G.8262",
organization = "ITU-T",
}
@standard{ieee1588,
title = "IEEE Standard for a Precision
Clock Synchronization Protocol for Networked Measurement and Control Systems",
organization = "IEEE",
address = "New York",
number = "1588-2008",
year = "2008",
}
@article{ispcs2011,
author = "M. Lipi\'{n}ski and T. W\l{}ostowski and J. Serrano and P. Alvarez",
title = "{White Rabbit: a PTP application for robust sub-nanosecond synchronization}",
journal = "Proceedings of ISPCS",
address = "Munich, Germany",
year = "2011",
}
@Misc{wrdraft,
author = "E.G. Cota and M. Lipi\'{n}ski and T. W\l{}ostowski and E.V.D. Bij and J. Serrano",
title = "{White Rabbit Specification: Draft for Comments}",
note = "v2.0",
month = "july",
year = "2011",
howpublished = {\url{www.ohwr.org/documents/21}}
}
@mastersthesis{tom,
author = "T. W\l{}ostowski",
title = "Precise time and frequency transfer in a {White} {Rabbit} network",
month = "may",
year = "2011",
school = "Warsaw University of Technology",
howpublished = {\url{http://www.ohwr.org/documents/80}}
}
@Inproceedings{wrproject,
author = "J. Serrano and P. Alvarez and M. Cattin and E. G. Cota and J. H. Lewis, P.
Moreira and T. W\l{}ostowski and others",
title = "{The White Rabbit Project}",
booktitle = "Proceedings of ICALEPCS TUC004",
address = "Kobe, Japan",
year = "2009",
}
@article{ddmtd,
author = "P. Moreira and P. Alvarez and J. Serrano and I. Darwezeh and T. Wlostowski",
title = "{Digital Dual Mixer Time Difference for Sub-Nanosecond Time Synchronization in Ethernet}",
journal = "Frequency Control Symposium (FCS), 2010 IEEE International",
address = "London, UK",
year = "2010",
}
@electronic{ppsimanual,
title = "{PPSi Manual}",
howpublished = {\\\url{www.ohwr.org/attachments/download/1952/ppsi-manual-130311.pdf}}
}
@electronic{ptpd,
title = "{PTPd project}",
howpublished = {\url{www.ptpd.sourceforge.net}}
}
@electronic{P1588WG,
title = "{IEEE P1588 Working Group}",
howpublished = {\url{ieee-sa.centraldesktop.com/1588public}}
}
@electronic{ptp-proposal,
title = "{White Rabbit PTP Proposal}",
howpublished = {\url{www.ohwr.org/documents/92}}
}
@electronic{gladstone,
title = "{White Rabbit Developers Mailing List}",
howpublished = {\\\url{lists.ohwr.org/sympa/arc/white-rabbit-dev/2014-03/msg00015.html}}
}
@electronic{spec,
title = "{Simple PCIe FMC carrier (SPEC)}",
howpublished = {\\\url{www.ohwr.org/projects/spec}}
}
@electronic{svec,
title = "{Simple VME FMC Carrier (SVEC)}",
howpublished = {\\\url{www.ohwr.org/projects/svec}}
}
@electronic{lgpl,
title = "{GNU Lesser General Public License}",
howpublished = {\\\url{www.gnu.org/licenses/lgpl.html}}
}
@article{better,
author = "Alessandro Rubini",
title = "{PPSi}",
journal = "Better Embedded Conference",
address = "Firenze, Italia",
month = "July",
year = "2013",
howpublished = {\\\url{www.betterembedded.it/conference/talks/ppsi-ptp-ported-to-silicon}}
}
@article{IBIC2013,
author = "J. Serrano and M. Cattin and E. Gousiou and E. van der Bij and T. Włostowski and G. Daniluk and M. Lipi\'{n}ski",
title = "{THE WHITE RABBIT PROJECT}",
journal = "Proceedings of IBIC2013",
address = "Oxford, UK",
year = "2013",
}
@electronic{wrpc,
title = "{White Rabbit PTP Core}",
howpublished = {\\\url{www.ohwr.org/projects/wr-cores/wiki/Wrpc\_core}}
}
@electronic{ppsi-repo,
title = "{PPSi's Public Git Repository}",
howpublished = {\\\url{git://ohwr.org/white-rabbit/ppsi.git}}
}
\documentclass[conference]{IEEEtran}
\usepackage{listings}
\lstset{basicstyle=\small\ttfamily,frame=lines,captionpos=b}
\usepackage{tikz}
% *** Do not adjust lengths that control margins, column widths, etc. ***
% *** Do not use packages that alter fonts (such as pslatex). ***
% There should be no need to do such things with IEEEtran.cls V1.6 and later.
% (Unless specifically asked to do so by the journal or conference you plan
% to submit to, of course. )
\graphicspath{ {../../figures/} }
\begin{document}
% paper title
% can use linebreaks \\ within to get better formatting as desired
\title{PPSi -- A Free Software PTP Implementation}
% author names and affiliations
% use a multiple column layout for up to three different
% affiliations
\author{\IEEEauthorblockN{Pietro Fezzardi}
\IEEEauthorblockA{Universit\`{a} degli Studi di Pavia\\
Pavia, Italy}
\and
\IEEEauthorblockN{Maciej Lipi\'{n}ski}
\IEEEauthorblockA{CERN, Geneva, Switzerland\\Warsaw Univ. of Tech., Poland}
\and
\IEEEauthorblockN{Alessandro Rubini}
\IEEEauthorblockA{Independent Consultant\\Pavia, Italy}
\and
\IEEEauthorblockN{Aurelio Colosimo}
\IEEEauthorblockA{Independent Consultant\\Milano, Italy}}
% make the title area
\maketitle
\begin{abstract}
%\boldmath
This paper describes a new open source implementation of the Precision Time
Protocol (PTP) \cite{ieee1588} called PTP Ported To Silicon (PPSi)
\cite{ppsimanual}. It was developed to fill in a niche in the free software
world for a PTP daemon that is easily portable to a wide range of
architectures and highly modular to enable protocol extensions --- two key
requirements of its driving force, the White Rabbit (WR) Project \cite{wr}
\cite{IBIC2013}. PPSi's core protocol
code is common for all the supported architectures ranging from a Linux PC to
a soft--core processor running in a Field Programmable Gate Array (FPGA) ---
a feature minimizing code duplication, easing debugging, and facilitating
new developments. This paper gives an overview of PPSi's internals describing
design choices as well as the means of achieving portability and extensibility.
A detailed example of a simulator architecture proves the design advantages.
With an increasing number of supported architectures and a wide use in
WR networks, PPSi is becoming an appealing PTP implementation also outside of
the White Rabbit Community.
\end{abstract}
% Improved clock synchronization accuracy through optimized servo parametrization
\section{Introduction}
%ML: I guess that you will use all this abbreviations... so their introduction will
%move to the "abstract part" but for now, you need to introduce and reference
%new terminology/abbreviations the first time you mention
PTP Ported to Silicon (PPSi) \cite{ppsimanual} is a portable Precision Time
Protocol (PTP) \cite{ieee1588} implementation developed for
the White Rabbit Project \cite{wr} \cite{IBIC2013} and licensed under the GNU Lesser
General Public License (LGPL) \cite{lgpl}.
White Rabbit \cite{ispcs2011} (WR) is an emerging technology designed for
high--accuracy synchronization and based on existing standards.
%not requiring a dedicated network. %ML technically speaking, it requires
%dedicated network, i.e. WR switches and single-fibers....
WR can be used in networks spanning several kilometers to synchronize thousands
of nodes with sub--nanosecond accuracy and tens--of--picoseconds precision
(mean offset and standard deviation, respectively). At
the same time WR can guarantee deterministic and reliable data delivery
with low--latency, without affecting synchronization \cite{icalepcs2011}.
The development of WR started at CERN as an effort to design the next
generation replacement for the existing timing and control system of the
accelerator facilities. Today, it has grown far beyond that: many
research centers and private companies are taking part in WR development or
considering its adoption.
WR is based on well known and existing standards and ideas, namely Ethernet
(IEEE 802.3) \cite{ieee8023},
%Synchronous Ethernet (SyncE) \cite{synce} % we refrain from saying we use SyncE,
% this is because we don't even try to
% follow all the SyncE specs - we rather
% the same idea as SyncE
L1 syntonization (used also by ITU--T \cite{synce}) and the Precision Time Protocol
(IEEE1588)~\cite{ieee1588}, offering many potential scientific and commercial
applications. A considerable effort was made to enhance the PTP protocol
to achieve sub--nanosecond accuracy of synchronization while keeping it compatible
with the standard and inter--operable with other implementations.
WR defines a PTP
Profile for high--accuracy applications, extending the protocol to what is
called WRPTP \cite{ispcs2011}\cite{wrdraft}. The mechanisms used in this profile
are now being evaluated by the IEEE P1588 Working Group~\cite{P1588WG} for
adoption in the next revision of the standard.
%ML: I commented out the following sentence out of two reasons:
%1) it is unexpected here, somehow out of blue
%2) it is incomplete or superficial - DDMTD and loopback would need to be mentioned
% as well.
%if need be (of more text), I would this could be extended into a paragraph of few
%sentences
% To achieve high accuracy WRPTP uses L1 syntonization to
% propagate frequency, so the slave's PLL locks to the master's frequency.
% After synthonization, dedicated WR hardware is able to timestamp
% frames with a resolution of a few picoseconds
% \cite{ddmtd}\cite{wrproject}\cite{tom}.
PPSi is the result of a development effort aimed at providing full support for
the newly defined WRPTP extension. The daemon is required to work in diverse
environments, both hosted (e.g. the Linux--based WR switch) and freestanding
(e.g. the WR
end node with no operating system). This is achieved by using
a modular design that separates the PTP protocol code from the
required interactions with the specific run time environment.
This approach eases further porting to new architectures and adding support for
other PTP profiles.
%ML: not sure the below part is relevant
%PF: I agree, and it's also stated a few lines above, so I remove it
% while being inter--operable with PTP and WRPTP devices.
In this paper we give a general overview of PPSi (section~\ref{overview}),
focusing on the modular design and stressing the technical choices aimed at
portability (section~\ref{ppsidesign} and \ref{internals}). In
section~\ref{supportedarch} we list and describe briefly all the architectures
currently supported by PPSi. Finally in section \ref{simulator} we present
an example of how PPSi's portable design was used to implement a
self--test environment for performance tuning of the non--WR servo parameters;
this simulation is supplemented with field--test results in section~\ref{field-tests}.
The discussion covers release 2014.07 of the code base, freely downloadable and
distributable, that can be found in the public git repository of the
project \cite{ppsi-repo}.
For a better comprehension of the text, some definitions are here provided:
a \textit{physical port} is the hardware performing the physical connection;
a \textit{PTP port} is a port as defined in the IEEE1588 protocol, i.e.
a virtual input receiving PTP protocol packet; by instantiating a different data structure
(a \textit{PPSi instance}) for each PTP port, PPSi supports more than one PTP
port running on the same physical port, e.g. one using UDP and another one
on a raw Ethernet channel.
\section{PPSi Overview}
\label{overview}
PPSi is the PTP daemon that currently drives White Rabbit networks.
%PF: I moved this part at the beginning of the section, so we firstly talk about
%more general things, like the origins and the license. then we start go in some
%details and supported features, so it is more streightforward to pass to the
%next section, where the design choices to support the features are explained
It is written in C language to be able to support size--constrained
environments, such as microcontrollers. The code base started
as a completely new design based on modern programming techniques;
we circulated an initial proof--of--concept document and prototype
code in WR development circles \cite{ptp-proposal},
and after an approval the actual development started in
December 2011. At the outset, PTP protocol code came from the
PTPd project \cite{ptpd}, the main working implementation
available as free software when the White Rabbit project started. Later
we migrated WR--PTPd to the new PPSi architecture, with a completely different
design aimed at portability and modularity. In the process, we followed an evolutionary
rather than revolutionary process, to be able to quickly identify
any regressions. Currently, very little remains of the
original code base.
% \begin{figure*}[!t]
% \centering
% \includegraphics[width=0.7\textwidth]{ppsi_arch.eps}
% \caption{PPSi architecture.}
% \label{fig:ppsiArch}
% \end{figure*}
\begin{figure}[!t]
\centering
\includegraphics[width=0.49\textwidth]{PPSi/ppsi_arch-1col.jpg}
\caption{PPSi architecture.}
\label{fig:ppsiArch}
\end{figure}
The source code is distributed under the GNU LGPL, which respects the
original PTPd licensing terms, ensures the program's core will remain
available to anyone, and allows support for new architectures even when
the respective vendor does not want to disclose its own timestamping mechanisms.
PPSi can operate as
an Ordinary Clock (OC) or as a Boundary Clock (BC) handling multiple ports.
%ML: myself-check
% with support for fall--back masters in redundant networks.
It supports PTP over both raw Ethernet or UDP, though currently only IPv4.
The user can configure
different working modes for each port; PPSi supports several PTP
ports on a single physical port which renders it useful to support both Ethernet
and UDP mappings simultaneously.
PPSi can be built for a number of different \textit{architectures}, where
the architecture code defines all interactions of the protocol code with
the specific run time environment. Another major build--time option
is a choice of the supported extensions, i.e. whether or not to include WRPTP.
%ML: below, please correct me if the changes make the sentence/meaning invalid
%PF: I commented out the following lines, because they are repeated later. I
% kept the next occurence because it's right after kconfig, so we discuss
% first about build-time config, then about run-time config.
% Run--time configuration is supported using ``configuration strings'' where
% each architecture code is responsible for providing such strings from any
% input, e.g. the hosted build supports two: configuration files and command
% line arguments.
More fine--grained configurations can be selected at run time.
Diagnostics in PPSi provide different loglevels for each port and
each code subsystem (e.g.: state machine, servo loop, frame I/O) that
can be changed at run time.
%PF: again configuration strings. I removed them since they are better explained
%later.
%by means of proper configuration strings.
Each architecture provides the means for delivering diagnostics to the user.
Frame diagnostics can go up to supporting complete dumps of sent and received
data items --- a feature which revealed useful when dealing with links between
two WR nodes where no network sniffer is available.
%AR: Sorry, maybe I overdid with diagnostics?
\section{PPSi Design Choices}
\label{ppsidesign}
The main design goal of PPSi, which differentiates it from other free
software implementations on the market, is being self--contained.
%ML: below, I found the fragment hard to understand. I did some modifications
% in an attempt to make it clearer - not sure these make sense.
% it seems that a figure illustrating this could be helpful, alternatively
% rewriting. I can do such figure if you send me hand-made sketch
The
top--level \textit{Makefile} builds an object file, \texttt{\small ppsi.o},
that looks like a library to the calling code; the object file contains
architecture--independent protocol code while the calling code is
architecture--specific, as depicted in Figure~\ref{fig:ppsiArch}.
After initial setup, the \textit{main} function
of the calling code refers to a single entry point to the protocol code,
\texttt{\small pp\_state\_machine()} --- a procedure that operates synchronously
and returns immediately: the caller passes a network frame as argument to the
procedure, if available, and receives back a delay value to wait
before calling the procedure again.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Portability is thus achieved by
removing any interaction with the outside world from the core of PPSi.
The code of PPSi can thus be divided into 3 areas depicted in
Figure~\ref{fig:ppsiArch}: the
architecture--independent PTP ``default protocol'', the ``architecture--specific'' glue
code, and profile--specific ``protocol extension'', such as WRPTP. The
architecture--specific code provides timing and frame I/O capabilities by
means of object methods associated to the PTP port (called \texttt{\small ppi},
PPSi instance). By filling in \texttt{\small pp\_time\_operations} and
\texttt{\small pp\_network\_operations} structures, each architecture
offers a complete link between the protocol engine and its
actual run time environment (additionally, some architectures provide
a main loop, as described later). Similarly, a custom PTP profile
is implemented by providing function pointers within the
\texttt{\small pp\_ext\_hooks} structure. Using these functions, by decoding
Type--Length--Value (TLV) tuples in the announce message, profile code can
choose to further
participate in the communication and the Best Master Clock Algorithm (BMCA)
decisions, or let the default PTP stack proceed unmodified.
The architectural split of PPSi anticipates the new layering description of the
IEEE1588 standard. The media--independent layer (``default protocol'')
defines clear \textit{interfaces} with the media--dependent part (``arch--specific'')
as well as option-- and profile--specific operations (``protocol extension'').
The latter can also interface directly with the media--dependent layer, if necessary
(e.g. control of Layer 1 syntonization in WR).
Build--time configuration of the architecture and extensions %adde to link with the
%previous paragraph
is performed using Kconfig. The tool, derived
from the Linux kernel, is easily integrated into other software
packages and is being widely adopted, becoming the de--facto standard
for package configuration. Kconfig offers both interactive and
non--interactive configuration, the latter by providing a pre--built
\texttt{\small .config} file. PPSi uses the non--interactive feature to
optionally compile for all supported architectures, thus giving a
complete build--time coverage of the code, helping developers in
identifying portability issues or new unsatisfied external dependencies.
% below is repeated, I fear
% PF: indeed. I think that makes more sense to talk about it here than above.
Run--time configuration happens through ``configuration strings'' that
the run time environment feeds to PPSi. Such configuration defines
the mapping and profile of each port, clock quality, diagnostic loglevels of
different subsystems and other
variables. The configuration parser
is a part of the PPSi core to ensure its availability in all the environments.
Each of the 3 areas of PPSi can provide an array of configuration keywords, i.e.
global items, as well as architecture--specific, and profile--specific keywords.
Consequently, special needs can be addressed locally by the individual use case
avoiding global changes to a centralized parser.
The main loop, providing the calling code, is not a proper part of PPSi ---
providing or not the main
loop, as well as the associated linking rule, is up to the
architecture--specific code. While hosted architectures usually
provide a main loop and a \texttt{\small main} function, a typical
freestanding implementation does not. This is because, in such environment,
the object files for all the
applications are linked together with an externally provided main
loop, be it a Real--Time Operating System or a custom procedure to boot
the processor. For example, the ``White Rabbit PTP Core Software'' \cite{wrpc},
driving the WR node, deals with a number of management activities
while running WRPTP. The PPSi build for \texttt{\small arch-wrpc} is a
bare \texttt{\small ppsi.o} which acts as a function library within
the main program of the soft processor.
% \texttt{\small wrpc-sw}.
\section{PPSi Internals}
\label{internals}
As said, the core of PPSi is the state machine which in turn relies on the
architecture--specific network and time operations.
\subsection{State Machine}
PPSi runs one state machine for each PTP port being configured at
start--up; each port is defined by a \texttt{\small struct pp\_instance}
object (\textit{ppi)} which runs a global \texttt{\small pp\_state\_table[]},
as presented in Figure~\ref{fig:ppsiArch}. The state table associates allowed states
with functions called when the state is executed as depicted in
Listing~\ref{lst:statetabstruct}. The actual
content of the state table, depends on whether PPSi is built for the
default or profile--extended protocol --- although a profile can
refer to the default state table. Being compliant to IEEE1588
is up to the actual code: PPSi forces no policy in that respect.
Actually, the implementation is a ``pure'' network--driven state
machine, with nothing PTP--specific in the engine itself.
\begin{lstlisting}[float=t,caption={Data structure for state table.},
label=lst:statetabstruct]
struct pp_state_table_item {
int state;
char *name;
pp_action *f1;
};
\end{lstlisting}
% \begin{lstlisting}[float=t,caption={Sample definition of a state
% table.},label=lst:statetab]
% struct pp_state_table_item pp_state_table[] = {
% { PPS_INITIALIZING,"init", pp_init,},
% { PPS_FAULTY, "faulty", pp_faulty,},
% %[...cut ..]
% { PPS_SLAVE, "slave", pp_slave,},
% { PPS_END_OF_TABLE,}
% };
% \end{lstlisting}
\subsection{Time operations}
One time operations structure is instantiated for each \texttt{\small pp\_instance}
(i.e. per PTP port).
The structure collects all time--related interactions between the PPSi
protocol code and its run time environment. The structure is shown in
Listing \ref{lst:timeops}.
\begin{lstlisting}[float=t,caption={Data structure for time
operations.},label=lst:timeops]
struct pp_time_operations {
int (*get)();
int (*set)();
int (*adjust)();
int (*adjust_offset)();
int (*adjust_freq)();
int (*init_servo)();
unsigned long (*calc_timeout)();
};
\end{lstlisting}
%ML: not needed, better show a diagram with how this functions map
% to arch-specific functions
% AR: really?
Timestamps are hosted in a \texttt{\small TimeInternal}
\footnote{We should note that \texttt{\small TimeInternal} follows a different
naming style than anything else shown so far; that's because
we chose to keep core items of WR--PTPd unchanged while redesigning the
program's architecture; the same convention applies to all data--set items,
where we follow official IEEE naming.}
structure which
includes high--precision fields, although they are only used by the WR profile.
The \texttt{\small set()} method is only used to implement time jumps when the slave
finds its time offset is too big; adjustments can be offset--based or
frequency--based, according to what the underlying architecture supports.
The servo initialization method is there mainly to ask the hardware what
is the currently applied frequency correction ---
in \texttt{\small arch-unix} this method uses
\texttt{\small adjtimex} to query the current system settings.
The retrieved value is used to prime servo parameters, whereas
other implementations reset the clock to ``no correction'' at program
startup. By using the previously--learned adjustment as its own
starting point, our servo can avoid learning the frequency error
of the individual host it runs on, a process taking several minutes.
This trick ensures a smooth behavior when the program is restarted or
is taking over servo control from NTP or another PTP implementation.
Timeouts in PPSi are purely--software entities, implemented by inline
functions that rely on the \texttt{\small calc\_timeout()} method
which must offer millisecond resolution and be monotonic.
The \texttt{\small get()} method does not return monotonic time, so
we need specific architecture support for timeouts.
\subsection{Network operations}
The network operations are shown in Listing \ref{lst:netops}. One such
structure is instantiated for every PPSi instance: this allows an architecture
to differentiate between ports offering hardware timestamps and ports
without this capability. Moreover, in case of a device having more physical
ports (e.g. a BC), each port may have its own \textit{driver}, thus
handling different hardware interfaces on the same device.
At initialization time, architecture--specific code is expected
to initialize each PTP port using information in the instance of
the \texttt{\small pp\_network\_operations}
data structure. Each architecture knows whether it can support
UDP or Ethernet mapping and can pre--set the default.
Where the architecture supports both mappings, configuration
can set up two PTP ports on a single physical interface.
The \texttt{\small send()} and \texttt{\small recv()} methods are responsible
for frame I/O but are also in charge of retrieving
a timestamp for the operation. The
\texttt{\small TimeInternal} values are those used in the PTP
calculations.
\begin{lstlisting}[float=b,caption={Data structure for network operations.},
label=lst:netops]
struct pp_network_operations {
int (*init)(ppi);
int (*exit)(ppi);
int (*recv)(ppi, frame, len, *tstamp);
int (*send)(ppi, frame, len, *tstamp);
int (*check_packet)();
};
\end{lstlisting}
%ML: below, say it enables to support both UPD and IP on the same port and stuff...
%AR: where?
Finally, the \texttt{\small check\_packet()} method is used to poll
hardware for frame arrival, waiting for no more than a specified
timeout.
\subsection{PTP Profiles and Extensions}
\label{sec:hooks}
PPSi supports PTP profiles and extensions by means of protocol ``hooks'' and
a customized state machine table, where needed (``Protocol extension'' in
Figure~\ref{fig:ppsiArch}). The available hooks are shown in Listing \ref{lst:exthooks}.
The implementation of the ``default protocol'' (Delay Request--Response Default PTP
profile) calls the defined hooks at appropriate
places, so the custom profile can override (or not) the default behavior
while avoiding massive code duplication. A profile may want to
define a customized state machine
table if it requires specific steps to
establish communication with a peer. For example, WR requires a sequence
of specific actions to establish a WR link whenever two WRPTP daemons
complete a successful Announce handshake.
\begin{lstlisting}[float=t,caption={Data structure for PTP extension
hooks.},label=lst:exthooks]
struct pp_ext_hooks {
int (*init)();
int (*open)(ppi, run_time_options);
int (*listening)(ppi, frame, len);
int (*master_msg)(ppi, frame, len, mtype);
int (*new_slave)(ppi, frame, len);
int (*handle_resp)(ppi);
void (*s1)(ppi, MsgHeader, MsgAnnounce);
int (*execute_slave)(ppi);
void (*handle_announce)(ppi);
int (*handle_f_up)(ppi, tstamp, correction);
int (*pack_announce)(ppi);
void (*unpack_announce)(buf, MsgAnnounce);
};
\end{lstlisting} %ML: if this is left (I would say that it's rather not very useful)<
% the names of the functions need to be bold to make them easily visible
% AR: I pass on the bold...
The current set of hooks covers three different roles:
\begin{enumerate}
\item managing the extension: \texttt{\small init()}, \texttt{\small
open()} and \texttt{\small close()}. These are concerned with
initialization and clean up of any additional data structures or
run time options.
\item extending the PTP protocol: from \texttt{\small listening()} to
\texttt{\small handle\_followup()}. These hooks can change the
protocol behavior in specific places to manage specific TLV tuples
or other needs, especially when transitioning from one state to
another.
\item handling packets for profile--specific handshake:
\texttt{\small pack\_announce()}
and \texttt{\small unpack\_announce()}. These hooks provide and
identify profile--specific TLV tuples to either activate
profile--specific communication with a new peer or fall back to
default PTP synchronization.
\end{enumerate}
The hooks shown are designed to match the needs of White Rabbit, but
if different profiles are added to the code base and new hooks are
needed, the structure can be extended. This approach is a practical solution to
limit code duplication and still keep the profile detached from the core PTP
engine of the default protocol.
\section{Supported Architectures}
\label{supportedarch}
PPSi's main use case is WR networks and Linux host computers. The
arch--independent design, though, brought us to support a number of
additional
architectures allowing to better help code development and testing.
The current code base includes support for the following architectures:
\begin{itemize}
\item \texttt{\small arch-unix}: the default architecture chosen for an unconfigured
build. It represents a UNIX hosted environment, which we usually
run on a Linux system with GNU standard libraries. It runs either as
an Ordinary Clock or a Boundary Clock.
\item \texttt{\small arch-wrs}: the ``White Rabbit Switch'' % \cite{wrsw}
build.
The WR switch includes special hardware for frame timestamping and
phase detection, so it needs its own set of network operations.
Also, the main loop of the process handles inter--process communication (IPC) with
other processes that run on the WR switch. Modeling the switch as
a separate architecture allows to share protocol code with the
other WR architecture implementation (i.e. \texttt{\small arch-wrpc}, below)
while exploiting the special WR switch
software and hardware, and still being able to fall back on default PTP
and \texttt{\small arch-unix} operations when running in a non--WR network.
The switch is a 18--port Boundary Clock.
%ML: the above item, wrs, undermines all the advantages of PPSi, [...]
%AR: hope I fixed this
%ML:the below: the main point to mention is that it runs on bare-bone embedded CPU
% that is running in FPGA ! (thus, very simple architecture with very limited
% resources)
\item \texttt{\small arch-wrpc}: the ``White Rabbit PTP Core'' \cite{wrpc}
architecture. This is
the port of PPSi that runs as an Ordinary Clock
in White Rabbit I/O peripherals, e.g. \cite{spec}, \cite{svec}. The
run time environment of this build is a soft--core CPU running within
an FPGA.
\item \texttt{\small arch-bare-i386} and
\texttt{\small arch-bare-x86\_64}: these two
architectures build Linux processes but do not rely on standard
libraries: both process startup and the few required system calls
are implemented in assembly language by the ports themselves. In
practice, these ports allow to run freestanding versions of PPSi
without the need to actually test on a microcontroller: if
programmers can build and run either of the \textit{bare} ports
for their host, surely no new unexpected dependency on host features
was introduced.
\item \texttt{\small arch-sim}: this is a simulator, recently added to the project
for testing purposes. When built for \texttt{\small arch-sim}, PPSi
implements two PTP instances that communicate: one of them is a master
and the other is a slave. By means of special time and network operations,
the two instances exchange frames via a software--only channel and
their perceived time flows at a much faster pace.
This ``architecture'' runs the
default protocol engine, and helped us tune the PPSi non--WR servo.
It is explained in section \ref{simulator}.
\end{itemize}
We plan porting PPSi to bare--metal ARM7 and Cortex--M; an ARM7 prototype was
demonstrated in 2013 \cite{better}. A user already ported to
STM32 \cite{gladstone}.
\section{An Example: the PPSi Simulator}
\label{simulator}
We now see an example of how the modularity of PPSi was used to implement a
simulator (mentioned in section \ref{supportedarch}) and tune the performance of a
non--WR servo controller.
% The implementation of the servo controller was not optimal for PPSi on non--WR
% links (default PTP) as its performance was not a primary concern in the WR project.
% Testing and
% performance tuning were hampered by the unpredictability of network parameters
% and by the fact that the results of the tests were not deterministically
% reproducible. To solve these problems simulation seemed a natural approach.
\begin{lstlisting}[float=b,caption={Data structure for clock
simulation.},label=lst:timesim]
struct pp_sim_time_instance {
int64_t current_ns; // ~300 years
int64_t freq_ppm_real; // simulated hw err
int64_t freq_ppm_servo; // correction drift
};
\end{lstlisting}
We designed the simulator as a new
% In order to ensure that the simulation tests directly the servo--code (to be used
% on the devices) in a time--efficient manner, we took advantage of the abstractions
% in PPSi architecture. We designed the simulator as a new
architecture (\texttt{\small arch-sim}) running as a Unix process
and featuring its own network operations
and time operations. The main program allocates two PPSi
state machines (i.e. two \textit{ppi} instances of \texttt{\small pp\_instance})
that simulate two PTP peers.
% Each simulated peer has its own
% notion of time, not related with
% the system clock of the physical host machine.
At run time, the peers act as PTP master and slave that communicate
exchanging PTP frames within the simulator itself.
The PPSi core runs unmodified: only the time and frame exchange are simulated;
this ensures that the convergence patterns shown by the simulator reflect
how PPSi behaves in real networks.
\begin{figure}[t]
\centering
\usetikzlibrary{shapes,arrows,matrix,fit}
\begin{tikzpicture}[>=latex']
\tikzstyle{cloud} = [draw=gray, thick, ellipse,fill=gray!20, minimum height=5em,
text width=4em, text badly centered]
\tikzstyle{decision} = [diamond, draw=blue, thick, fill=blue!20,
text width=4em, text badly centered]
\tikzstyle{block} = [rectangle, rounded corners, text badly centered, thick,
text width=6em]
\tikzstyle{branch} = [circle, inner sep=0pt, minimum size=1mm, fill=black,
draw=black]
\matrix [ampersand replacement=\&, column sep=5mm, row sep=4mm]
{
\node {}; \&
\node [block, draw=yellow, fill=yellow!30] (begin)
{\scriptsize initialization}; \&
\node {}; \\
\node {}; \&
\node [decision] (frame) {\scriptsize is next event a frame?}; \&
\node [cloud] (timeout) {\scriptsize next event is a timeout}; \\
\node [block, draw=green, fill=green!30] (update)
{\scriptsize update timeouts and pending frames}; \&
\node [block, draw=red, fill=red!30] (ff1)
{\scriptsize fast forward till frame reception}; \&
\node [block, draw=red, fill=red!30] (ff2)
{\scriptsize fast forward till timeout expiry}; \\
\node {}; \&
\node [block, draw=gray, fill=gray!20] (recv)
{\scriptsize receive frame}; \&
\node [block, draw=gray, fill=gray!20] (fsm)
{\scriptsize run all state machines}; \\
\node (1){}; \&
\node (2){}; \&
\node (3){}; \\
};
\draw [dashed,->] (begin) -- (frame);
\draw [->] (frame) -- node [above] {N} (timeout);
\draw [->] (frame) -- node [right] {Y} (ff1);
\draw [->] (timeout) -- (ff2);
\draw [->] (ff1) -- (recv);
\draw [->] (ff2) -- (fsm);
\draw [->] (recv) -| (update);
\draw (fsm) |- (2) -| (update);
\draw (1) -- (3);
\draw [->] (update) |- (frame);
\end{tikzpicture}
\caption{Overall functioning of the simulator.}
\label{fig:mainloop}
\vspace{-0.3cm}
\end{figure}
The user can specify latency and jitter of the simulated link at
run time in the configuration file; adding fault--injection is the next
planned step.
% The simulator keeps track of PTP messages being sent in an internal
% array, and delivers them to the proper peer at the appropriate time
% according to current network parameters. Delivery happens by
% calling the state machine with the frame as an argument.
PPSi calls the PTP state machine either when it receives a frame or when its
timeout expires. The simulator takes advantage of this knowledge of timeouts
by fast--forwarding the time of both peers whenever no frame is
being delivered and both peers are waiting for a timeout. Thus, the
simulator, on current PC hardware, can run one thousand complete
PTP exchanges in one second of CPU time.
%ML: this reminds me that in PPSi everything is timeout-triggered, right ? so
% it is defined when the next call for a given function should be made...
% or something like this -> I think this is not soo standard and could be
% nicely described. then, it will aid in explaining how the fast forward
% in simulation was accomplished. not sure you could do the simulation so easily
% in PTPd
Figure \ref{fig:mainloop} represents the simulator's loop, which is
repeated for a user--defined number of iterations. If PPSi
is run with proper diagnostic levels, developers can use the
log files to gather useful information.
% The simulator
% represents a clock instance using the data structure presented in
% Listing~\ref{lst:timesim}. This design allows the time of each peer
% to flow at a different hardware rate and accept any corrections
% servo code requests in the slave clock.
% Each fast--forward event respects the current clock rate of each peer.
% The main loop of the simulator is controlled by the algorithm in
% Figure~\ref{fig:mainloop} and performed in the following steps:
% % performs fast--forwarding and calls the
% % PTP state machines:
% \begin{enumerate}
% \item it updates the \texttt{\small struct pp\_sim\_time\_instance} of the two peers and
% changes all pending timeouts accordingly;
% \item it calls the two state machines passing a frame, if received;
% \item it fast--forwarding the time (and goes back to point 1).
% \end{enumerate}
% The loop is repeated forever or for a user-defined number of iterations
% providing developers with useful information through diagnostic tools
% available in PPSi.
\begin{figure}[!t]
\centering
% \includegraphics[scale=0.8]{before.eps}
\includegraphics[width=0.48\textwidth]{PPSi/before3.jpg}
\caption{The simulated synchronization performance, master--to--slave offset, before
performance tuning.}
\label{fig:before}
% \end{figure}
% % \vspace{-1cm}
% \begin{figure}[!t]
% \centering
% \includegraphics[scale=0.8]{after.eps}
\includegraphics[width=0.48\textwidth]{PPSi/after3.jpg}
\caption{The simulated synchronization performance, master--to--slave offset, after
performance tuning.}
\label{fig:after}
% \vspace{-0.3cm}
\end{figure}
The simulator architecture enabled to stress--test PPSi and isolate
errors in the non--WR servo controller implementation, so we could
improve its performance. The results of a simulated synchronization between two
PTP nodes with an initial frequency offset of 10ppm are presented in Figure~\ref{fig:before}
and Figure~\ref{fig:after}. The simulation was configured to have an average
propagation delay of 1 ms, an initial master--to--slave offset of 10 ms and
introduce a random transmission jitter in the range [$-100$ns, $100$ns].
Figure~\ref{fig:before} shows simulation performance before servo--tuning:
poor convergence and constant steady--state offset
% and up to 70ns jumps
can be observed.
Different simulation runs showed a dependency of the time offset with respect to
the initial frequency offset between master and slave.
% TODO:
% "and unjustified jitter, dependent on the initial frequency offset, can
% be observed." No, it cannot be observed. If there is jitter in steady
% state, it is not visible in the figure due to the scale, so it should
% just be described in the text: "there is a jitter of xx ns", The
% dependency of jitter on the initial frequency offset is also something
% with, contrarily to what the text says, cannot be seen in the figure.
% You could just say something like "different simulation runs also showed
% a dependency of steady-state jitter with respect to the initial
% frequency offset". The figures are OK. I would only change the text.
After using the simulator to identify and fix the main reasons for such performance
and further tuning the controller, the results presented in Figure~\ref{fig:after}
were obtained. The synchronization accuracy improved significantly;
% , regardless of frequency offset
now the main contributor to the performance is the jitter which stays within
tens--of--nanoseconds. %, regardless of frequency offset.% between master and slave.
% \vspace{-0.5cm}
\begin{figure}[]
\centering
% \includegraphics[scale=0.8]{initial.eps}
\includegraphics[width=0.44\textwidth]{PPSi/initC.jpg}
% \caption{Actual clock offset using software time stamps.}
\caption{Measured synchronization performance using software time stamps.}
\label{fig:initial}
% \end{figure}
% % \vspace{-1cm}
% \begin{figure}[]
% \centering
% \includegraphics[scale=0.8]{recover2.eps}
\includegraphics[width=0.44\textwidth]{PPSi/recover2.jpg}
\caption{Recovering from a time jump: raw data of offset values calculated by PPSi from software time stamps.}
\label{fig:recover}
\vspace{-0.3cm}
\end{figure}
\section{Field--test results of the simulation--based improvements}
\label{field-tests}
Real tests on the field confirmed the slave
converged much better with these code changes, matching the data
we collected in the simulations and proving the usefulness of the simulator.
As an example, Figure~\ref{fig:initial} shows the time offset of a
slave PPSi instance running on a general purpose computer from a White
Rabbit master; the computer uses software time stamps and had an
initial offset of 1ms. The PPSi running on the WR Switch is running the
default PTP profile because it identified its own peer as not
WR--capable. Despite running on a conventional office network,
including a general purpose Ethernet switch in the PTP path, the
current PPSi servo, with the improvements we developed using the
simulator, can keep the slave well within 1 microsecond from the
master, symmetrically around zero.
Figure~\ref{fig:recover} shows how PPSi recovers from a jump in slave
time by 500 microseconds.
% Javier: ``The jump in fig. 6 looks like 50 us to me, not 500.''
% But no, it is 500, only it goes outside of the figure, the line enters
% later. See ``recover-data'' as amended in this commit.
The Y axis represents the offset from
master actually measured by PPSi in each PTP iteration, without any filtering:
the figure shows all the jitter you may expect with software time
stamps. During recovery we see an ``outlier'' event at iteration 290:
the measured offset from master is quite different from the nearby
ones.
PPSi considers outliers all measurements that are too different, in
magnitude, from the current averaged value, but relaxes its threshold
as more outliers are detected, to eventually react if a real change in
network latency occurred. This outlier value is not discarded because
its magnitude is not much different from the current running average,
but no such events occur after the slave is synchronized to the master.
\section{Conclusions}
\label{conclusions}
This paper describes the implementation of the PPSi PTP daemon. We
designed PPSi as a Free Software package that supports the White
Rabbit PTP Profile, while keeping it interoperable with the default
profile. Our need to support White Rabbit in both Linux environments
and standalone FPGA nodes led us to abstract interactions between the
actual protocol and the time/network software primitives. As a
result, PPSi is currently the only PTP implementation whose
portability spans both hosted and freestanding environments.
A thorough review of PPSi by an external expert in early 2014 provided
us with clear guidelines and enabled to set the path for further developments
and improvements to the current code base, mainly in the area of management.
% In early 2014 we received a thorough review by an experienced
% colleague, and we are going to follow his guidelines to improve the
% code base; the main area where PPSi needs work is management.
Overall, we think PPSi is a promising PTP implementation, able to support
a wide range of devices and platforms, without penalizing performance and
leaving the path open to future PTP profiles and new versions of the standard.
\bibliographystyle{IEEEtran}
\bibliography{IEEEabrv,./ppsi}
\end{document}
% LocalWords: PPSi picoseconds SyncE CERN WRPTP Kconfig struct TimeInternal
% LocalWords: tstamp recv send init
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment