diff --git a/mybib.bib b/mybib.bib index 42a1730..056b6f3 100644 --- a/mybib.bib +++ b/mybib.bib @@ -1006,6 +1006,12 @@ ISSN={1530-2059},} YEAR = "1988" } +@BOOK{rdh, + AUTHOR = "F~Langford-Smith", + TITLE = "Radio designers Handbook: Fourth Edition", + PUBLISHER = "ILIFFE", + YEAR = "1953" +} @BOOK{wdycwopt, AUTHOR = " Richard~P~Feynman", diff --git a/submission_thesis/CH1_introduction/copy.tex b/submission_thesis/CH1_introduction/copy.tex index 2407a0b..8b3e729 100644 --- a/submission_thesis/CH1_introduction/copy.tex +++ b/submission_thesis/CH1_introduction/copy.tex @@ -44,7 +44,7 @@ are based on statistical thresholds for the frequency of dangerous failures. We could state, for instance, that we can tolerate an `acceptable' maximum number of dangerous failures per billion hours of operation. % -We can then broadly categorise ratings of failure rates into Safety Integrity Levels (SIL)~\cite{scsh}. +We can then broadly categorise orders of failure rates into Safety Integrity Levels (SIL)~\cite{scsh}. % So for a maximum of 10 potentially dangerous failures per billion hours of operation we assign a SIL level of 4, for 100 a SIL level of 3, and so on in powers of ten. @@ -61,8 +61,8 @@ such as a nuclear power-station or air-liner, with far greater consequences on dangerous failure may require a SIL rating of 4. % -What we are saying is that while we may tolerate a low incidence of failure on a band-saw, -we will only tolerate extremely low incidences of failure in nuclear plant. +That is while a low incidence of failure may be tolerable on a band-saw, +extremely low incidences of failure would be tolerable in a nuclear plant. SIL ratings provide another objective yardstick for the measurement of system safety. %governing failure conditions and determining risk levels associated with systems. @@ -90,7 +90,7 @@ and using contract programmed software, allows the modelling of integrated software/electrical systems. % This is followed by two chapters showing examples of the new modular FMEA analysis technique (Failure Mode Modular De-Composition FMMD) -firstly looking at common electronic circuits and then at electronic/software hybrid systems. +firstly looking at a variety of common electronic circuits and then at electronic/software hybrid systems. } \section{Motivation} @@ -134,8 +134,8 @@ Any of the components that could, in failing, create a dangerous state were alre documented and approved using failure mode effects analysis (FMEA). % This new requirement -effectively meant that all single and double component failures were -now required to be analysed. +effectively meant that single and double component failures were +now required to be analysed~\cite{en298}[9.1.5]. % This, from a state explosion problem alone, meant that it was going to be virtually impossible to perform. diff --git a/submission_thesis/CH5_Examples/copy.tex b/submission_thesis/CH5_Examples/copy.tex index 8915c10..b17562d 100644 --- a/submission_thesis/CH5_Examples/copy.tex +++ b/submission_thesis/CH5_Examples/copy.tex @@ -1,25 +1,6 @@ -%\clearpage %\pagenumbering{arabic} - -% -% %% NEED TWO MORE EXAMPLES --- 02JUN2012 -% -% * ENVIRONMENTAL CASE (perhaps temp on an opto-coupler -% -% * OPERATIONAL STATE (perhaps a self test on an ADC where it is set to output and driven high and low and read) - -% to do: 23SEP2012 % -% 90_degrees is an incorrect failure mode in bubba and must be purged -% -% summing junction in sigma delta is not a valid fg, prob have to include -% the op-amp.... -% -% very annoying to have to pull out the comparison complexity. -% makes the comparisons between approaches have less meaning. -% have to discuss this. - \label{sec:chap5} - +% This chapter demonstrates FMMD applied to a variety of typical electronic circuits including analogue and digital %and electronics/software @@ -68,21 +49,19 @@ by applying FMMD to a sigma delta ADC. %analogue and digital signals. \item Section~\ref{sec:Pt100} demonstrates FMMD being applied to a commonly used Pt100 safety critical temperature sensor circuit, this is analysed for single and then double failure modes. - - \end{itemize} - +% %~\ref{sec:chap4} %can be re-used. %, but with provisos. % %The first %(see section~\ref{sec:diffamp}) - % % - % - +% +% +% % % Moving Pt100 to metrics % @@ -91,7 +70,7 @@ safety critical temperature sensor circuit, this is analysed for single and then %and the analysis of double simultaneous failure modes. % % Now in CHAPTER 6: Finally section~\ref{sec:elecsw} demonstrates FMMD analysis of a combined electronic and software system. - +% % \section{Basic Concepts Of FMMD} % % The %idea @@ -149,16 +128,14 @@ safety critical temperature sensor circuit, this is analysed for single and then % % \item {\dc} - a new component derived from an analysed {\fg} % % \end{itemize} % - - - +% %%%% XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX % % This section might fit in with the literature review.... Chris thinks its not relevant here % and I agree 20OCT2012 % %%%% XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX - +% % % \section{ FMMD overview} % % % % In the next sections we apply FMMD to electronic circuits, analogue/digital and electronic/software hybrids. @@ -244,12 +221,12 @@ safety critical temperature sensor circuit, this is analysed for single and then % % % % % % - +% \clearpage \section{Example Analysis: Inverting OPAMP} - +% \label{sec:invamp} - +% \begin{figure}[h] \centering \includegraphics[width=200pt]{CH5_Examples/invamp.png} @@ -257,16 +234,16 @@ safety critical temperature sensor circuit, this is analysed for single and then \caption{Inverting Amplifier Configuration} \label{fig:invamp} \end{figure} - +% %This configuration is interesting from methodology pers. There are two obvious ways in which we can model this circuit. One is to do this in two stages, by considering the gain resistors to be a potential divider and then combining it with the OPAMP failure mode model. The second is to place all three components in one {\fg}. Both approaches are followed in the next two sub-sections. - +% \subsection{First Approach: Inverting OPAMP using a Potential Divider {\dc}} - +% Ideally we would like to re-use {\dcs} from the $PD$ from section~\ref{subsec:potdiv}, which on initial inspection, %at first glance, looks a good candidate for this. % @@ -290,7 +267,7 @@ and analyse it as such; see table~\ref{tbl:pdneg}. We assume a valid range for the output value of this circuit. Thus negative or low voltages can be considered as LOW and voltages higher than this range considered as HIGH. - +% \begin{table}[h+] \caption{Inverted Potential divider: Single failure analysis} \begin{tabular}{|| l | l | c | c | l ||} \hline @@ -304,7 +281,7 @@ and voltages higher than this range considered as HIGH. \end{tabular} \label{tbl:pdneg} \end{table} - +% \begin{figure}[h] \centering \begin{tikzpicture}[shorten >=1pt,->,draw=black!50, node distance=\layersep] @@ -342,18 +319,18 @@ and voltages higher than this range considered as HIGH. \path (R1SHORT) edge (PDHIGH); \end{tikzpicture} - +% \caption{Failure symptoms of the `Inverted Potential Divider' $INVPD$} \label{fig:pdneg} \end{figure} - - +% +% We can form a {\dc} from the analysis results in table~\ref{tbl:pdneg} %this, and call it an inverted potential divider $INVPD$. - +% We can now progress to the final stage of analysis for this amplifier, by forming a {\fg} with the OpAmp and our new {\dc} $INVPD$. - +% \begin{table}[h+] \caption{Inverting Amplifier: Single failure analysis using the $PD$ {\dc}} \begin{tabular}{|| l | l | c | c | l ||} \hline @@ -376,11 +353,11 @@ by forming a {\fg} with the OpAmp and our new {\dc} $INVPD$. \end{tabular} \label{tbl:invamppd} \end{table} - - +% +% %%This gives the same results as the analysis from figure~\ref{fig:invampanalysis}. - - +% +% \begin{figure}[h+] \centering \begin{tikzpicture}[shorten >=1pt,->,draw=black!50, node distance=\layersep] @@ -475,8 +452,8 @@ by forming a {\fg} with the OpAmp and our new {\dc} $INVPD$. \caption{Full DAG representing failure modes and symptoms of the Inverting Op-amp Circuit} \label{fig:invdag1} \end{figure} - - +% +% %The differences are the root causes or component failure modes that %lead to the symptoms (i.e. the symptoms are the same but causation tree will be different). We can now express the failure modes for the {\dc} $INVAMP$ thus; @@ -486,9 +463,9 @@ We can draw a DAG representing the failure mode behaviour of this amplifier (see figure~\ref{fig:invdag1}). Note that this allows us to traverse from system level, or top failure modes to base component failure modes. %%%%% 12DEC 2012 UP to here in notes from AF email. - +% \clearpage - +% \subsection{Second Approach: Inverting OpAmp analysing with three components in one larger {\fg}} \label{subsec:invamp2} Here we analyse the same problem without using an intermediate $PD$ @@ -504,10 +481,10 @@ This concern is re-visited in the differencing amplifier example in the next sec %to symptoms) we cannot have a component failure mode that maps to two different symptoms (within a functional group). %Note that here we have a more general symptom $ OUT OF RANGE $ which could mean either %$HIGH$ or $LOW$ output. - +% % 08feb2012 bugger considering -ve input. It complicates things. % maybe do an ac amplifier later at some stage. - +% \begin{table}[h+] \caption{Inverting Amplifier: Single failure analysis: 3 components} \begin{tabular}{|| l | l | c | c | l ||} \hline @@ -1919,6 +1896,7 @@ and is a well known safety critical circuit. Applying FMMD lets us look at this circuit in a fresh light. We analyse this for both single and double failures, in addition it demonstrates FMMD coping with component parameter tolerances. +% The circuit is described traditionally and then analysed using the FMMD methodology. @@ -1966,11 +1944,14 @@ industrial applications below 600\oc, due to high accuracy\cite{aoe}. \label{Pt100range} The Pt100 four wire circuit uses two wires to supply a small electrical current, and returns two sense voltages by the other two. +% By measuring voltages from sections of this circuit forming potential dividers, we can determine the -resistance of the platinum wire sensor. The resistance +resistance of the platinum wire sensor. +% +The resistance of this is directly related to temperature, and may be determined by -look-up tables or a suitable polynomial expression. +look-up tables~\cite{eurothermtables} or a suitable polynomial expression. % % \begin{figure}[h] @@ -2055,8 +2036,8 @@ Where this occurs a circuit re-design is probably the only sensible course of ac \fmodegloss \paragraph{Single Fault FMEA Analysis of $Pt100$ Four wire circuit.} - -\label{fmea} +\label{sec:singlePt100FMEA} +%\label{fmea} The Pt100 circuit consists of three resistors, two `current~supply' wires and two `sensor' wires. Resistors, are considered to fail by either going OPEN or SHORT (see section~\ref{sec:res_fms}). %circuit\footnote{EN298:2003~\cite{en298} also requires that components are downrated, @@ -2144,25 +2125,25 @@ tables \cite{eurothermtables}, this corresponded to the resistances \ohms{100} and \ohms{212.02} respectively. From this the potential divider circuit can be analysed and the maximum and minimum acceptable voltages determined. These can be used as bounds results to apply the findings from the -Pt100 FMEA analysis in section \ref{fmea}. - +Pt100 FMEA analysis in section\ref{sec:Pt100floating}. %\ref{fmea}. +% As the Pt100 forms a potential divider with the \ohms{2k2} load resistors, the upper and lower readings can be calculated thus: - - +% +% $$ highreading = 5V.\frac{2k2+Pt100}{2k2+2k2+pt100} $$ $$ lowreading = 5V.\frac{2k2}{2k2+2k2+Pt100} $$ So by defining an acceptable measurement/temperature range, and ensuring the values are always within these bounds, we can be confident that none of the resistors in this circuit has failed. - +% To convert these to twelve bit ADC (\adctw) counts: - +% $$ highreading = 2^{12}.\frac{2k2+Pt100}{2k2+2k2+pt100} $$ $$ lowreading = 2^{12}.\frac{2k2}{2k2+2k2+Pt100} $$ - - +% +% \begin{table}[ht] \caption{Pt100 Maximum and Minimum Values} % title of Table \centering % used for centering table @@ -2181,12 +2162,12 @@ $$ lowreading = 2^{12}.\frac{2k2}{2k2+2k2+Pt100} $$ \end{tabular} \label{ptbounds} \end{table} - +% Table \ref{ptbounds} gives ranges that determine correct operation. In fact it can be shown that for any single error (short or opening of any resistor) this bounds check will detect it. - - +% +% % WAS a repeated paragraph % \paragraph{Consideration of Resistor Tolerance.} % % @@ -2219,14 +2200,14 @@ will detect it. % will be determined by the accuracy of $R_2$ and $R_{3}$. It is reasonable to % take the mean square error of these accuracy figures~\cite{probstat}. % - +% \paragraph{Single Fault FMEA Analysis of $Pt100$ Four wire circuit} - - +% +% \ifthenelse{\boolean{pld}} { \paragraph{Single Fault Modes as PLD} - +% The component~failure~modes in table \ref{ptfmea} can be represented as contours on a PLD diagram. Each test case, is defined by the contours that enclose @@ -2241,23 +2222,23 @@ and are thus enclosed by one contour each. \label{fig:Pt100_tc} \end{figure} } % \ifthenelse {\boolean{pld}} - +% %ating input Fault This circuit supplies two results, the {\em sense+} and {\em sense-} voltage readings. To establish the valid voltage ranges for these, and knowing our valid temperature range for this example ({0\oc} .. {300\oc}) we can calculate valid voltage reading ranges by using the standard voltage divider equation \ref{eqn:vd} for the circuit shown in figure \ref{fig:vd}. - - - - +% +% +% +% \paragraph{Proof of Out of Range Values for Failures} \label{pt110range} Using the temperature ranges defined above we can compare the voltages we would get from the resistor failures to prove that they are -`out of range'. There are six test cases and each will be examined in turn. - +`out~of~range'. There are six test cases and each will be examined in turn. +% \subparagraph{ TC 1 : Voltages $R_1$ SHORT } With Pt100 at 0\oc $$ highreading = 5V $$ @@ -2267,19 +2248,19 @@ $$ lowreading = 5V.\frac{2k2}{2k2+100\Omega} = 4.78V$$ With Pt100 at the high end of the temperature range 300\oc. $$ highreading = 5V $$ $$ lowreading = 5V.\frac{2k2}{2k2+212.02\Omega} = 4.56V$$ - +% Thus with $R_1$ shorted both readings are outside the proscribed range in table \ref{ptbounds}. - +% \paragraph{ TC 2 : Voltages $R_1$ OPEN } - +% In this case the 5V rail is disconnected. All voltages read are 0V, and therefore both readings are outside the proscribed range in table \ref{ptbounds}. - - +% +% \paragraph{ TC 3 : Voltages $R_2$ SHORT } - +% With Pt100 at 0\oc $$ lowreading = 0V $$ Since the lowreading or sense- is directly connected to the 0V rail, @@ -2290,35 +2271,35 @@ $$ highreading = 5V.\frac{212.02\Omega}{2k2+212.02\Omega} = 0.44V$$ % Thus with $R_2$ shorted both readings are outside the proscribed range in table \ref{ptbounds}. - +% \paragraph{ TC 4 : Voltages $R_2$ OPEN } Here there is no potential divider operating and both sense lines will read 5V, outside of the proscribed range. - - +% +% \paragraph{ TC 5 : Voltages $R_3$ SHORT } - +% Here the potential divider is simply between the two 2k2 load resistors. Thus it will read a nominal; 2.5V. - +% Assuming the load resistors are precision components, and then taking an absolute worst case of 1\% either way. - +% $$ 5V.\frac{2k2*0.99}{2k2*1.01+2k2*0.99} = 2.475V $$ - +% $$ 5V.\frac{2k2*1.01}{2k2*1.01+2k2*0.99} = 2.525V $$ - +% These readings both lie outside the proscribed range. Also the sense+ and sense- readings would have the same value. - +% \paragraph{ TC 6 : Voltages $R_3$ OPEN } - +% Here the potential divider is broken. The sense- will read 0V and the sense+ will read 5V. Both readings are outside the proscribed range. - +% \subsection{Summary of Analysis} - +% All six test cases have been analysed and the results agree with the FMEA presented in table~\ref{ptfmea}. %The PLD diagram, can now be used to collect the symptoms. @@ -2331,7 +2312,7 @@ In practical use, by defining an acceptable measurement/temperature range, and ensuring the values are always within these bounds, we can be confident that none of the resistors in this circuit has failed. - +% \ifthenelse{\boolean{pld}} { \begin{figure}[h] @@ -2342,8 +2323,8 @@ resistors in this circuit has failed. \label{fig:Pt100_tc_sp} \end{figure} } - - +% +% \subsection{Derived Component with one failure mode.} The Pt100 circuit can now be treated as a component in its own right, and has one failure mode, {\textbf OUT\_OF\_RANGE}. This is a single, detectable failure mode. The detectability of a @@ -2353,7 +2334,7 @@ has been developed for safety critical temperature measurement. \ifthenelse{\boolean{pld}} { It can now be represented as a PLD see figure \ref{fig:Pt100_singlef}. - +% \begin{figure}[h] \centering \includegraphics[width=100pt,bb=0 0 167 194,keepaspectratio=true]{./CH5_Examples/Pt100_singlef.png} @@ -2362,22 +2343,22 @@ It can now be represented as a PLD see figure \ref{fig:Pt100_singlef}. \label{fig:Pt100_singlef} \end{figure} } - +% %From the single faults (cardinality constrained powerset of 1) analysis, we can now create %a new derived component, the {\emPt100circuit}. This has only \{ OUT\_OF\_RANGE \} %as its single failure mode. - - +% +% %Interestingly we can calculate the failure statistics for this circuit now. %Mill 1991 gives resistor stats of ${10}^{11}$ times 6 (can we get special stats for Pt100) ??? %\clearpage - - - +% +% +% %\section{Double failure analysis} - +% %CITE PRICE MULTIPLE FAILURE PAPER. - +% %\clearpage \section{ Pt100 Double Simultaneous Fault Analysis} \label{sec:Pt100d} @@ -2398,7 +2379,7 @@ Table \ref{tab:ptfmea2} lists all the combinations of double faults as FMMD test cases. %and then hypothesises how the functional~group will react %under those conditions. - +% \begin{table}[ht] \caption{Pt100 FMEA Double Faults} % title of Table \centering % used for centering table @@ -2431,10 +2412,10 @@ TC 18: & $R_2$ SHORT $R_3$ SHORT & low & low & Both out of Rang \end{tabular} \label{tab:ptfmea2} \end{table} - - +% +% %\paragraph{Proof of Double Faults Hypothesis} - +% \paragraph{ TC 7 : Voltages $R_1$ OPEN $R_2$ OPEN } \label{Pt100:bothfloating} This double fault mode produces an interesting symptom. @@ -2451,84 +2432,84 @@ fault. % Undetectable faults are generally to be avoided in a safety critical environment~\cite{ACS:ACS1297,721666}. %that must be handled. - - +% +% \paragraph{ TC 8 : Voltages $R_1$ OPEN $R_2$ SHORT } - +% This cuts the supply from Vcc. Both sense lines will be at zero. Thus both values will be out of range. - - +% +% \paragraph{ TC 9 : Voltages $R_1$ OPEN $R_3$ OPEN } - +% Sense- will be floating. Sense+ will be tied to Vcc and will thus be out of range. - +% \paragraph{ TC 10 : Voltages $R_1$ OPEN $R_3$ SHORT } - +% This shorts ground to both of the sense lines. Both values will be out of range. - +% \paragraph{ TC 11 : Voltages $R_1$ SHORT $R_2$ OPEN } - +% This shorts both sense lines to Vcc. Both values will be out of range. - - +% +% \paragraph{ TC 12 : Voltages $R_1$ SHORT $R_2$ SHORT } - +% This shorts the sense+ to Vcc and the sense- to ground. Both values will be out of range. - - +% +% \paragraph{ TC 13 : Voltages $R_1$ SHORT $R_3$ OPEN } - +% This shorts the sense+ to Vcc and the sense- to ground. Both values will be out of range. - +% \paragraph{ TC 14 : Voltages $R_1$ SHORT $R_3$ SHORT } - +% This shorts the sense+ and sense- to Vcc. Both values will be out of range. - +% \paragraph{ TC 15 : Voltages $R_2$ OPEN $R_3$ OPEN } - +% This shorts the sense+ to Vcc and causes sense- to float. The sense+ value will be out of range. - - +% +% \paragraph{ TC 16 : Voltages $R_2$ OPEN $R_3$ SHORT } - +% This shorts the sense+ and sense- to Vcc. Both values will be out of range. - - - - - +% +% +% +% +% \paragraph{ TC 17 : Voltages $R_2$ SHORT $R_3$ OPEN } - +% This shorts the sense- to ground. The sense- value will be out of range. - - +% +% \paragraph{ TC 18 : Voltages $R_2$ SHORT $R_3$ SHORT } - +% This shorts the sense+ and sense- to Vcc. Both values will be out of range. - +% %\clearpage - +% \ifthenelse{\boolean{pld}} { \subsection{Double Faults Represented on a PLD Diagram} - +% We can show the test cases on a diagram with the double faults residing on regions corresponding to overlapping contours see figure \ref{fig:plddouble}. Thus $TC\_18$ will be enclosed by the $R2\_SHORT$ contour and the $R3\_SHORT$ contour. - - +% +% \begin{figure}[h] \centering \includegraphics[width=450pt,bb=0 0 730 641,keepaspectratio=true]{./CH5_Examples/plddouble.png} @@ -2536,7 +2517,7 @@ Thus $TC\_18$ will be enclosed by the $R2\_SHORT$ contour and the $R3\_SHORT$ co \caption{Pt100 Double Simultaneous Faults} \label{fig:plddouble} \end{figure} - +% We use equation \ref{eqn:correctedccps2} to verify complete coverage for a given cardinality constraint is not visually obvious. % @@ -2546,22 +2527,22 @@ not that all for a given cardinality constraint have been included. } { } - +% \paragraph{Symptom Extraction} - +% We can now examine the results of the test case analysis and apply symptom abstraction. In all the test case results we have at least one out of range value, except for $TC\_7$ which has two unknown values/floating readings. We can collect all the faults, except $TC\_7$, into the symptom $OUT\_OF\_RANGE$. As a symptom $TC\_7$ could be described as $FLOATING$. - +% \ifthenelse{\boolean{pld}} { We can thus draw a PLD diagram representing the failure modes of this functional~group, the Pt100 circuit from the perspective of double simultaneous failures, in figure \ref{fig:Pt100_doublef}. - +% \begin{figure}[h] \centering \includegraphics[width=450pt,bb=0 0 730 641,keepaspectratio=true]{./CH5_Examples/plddoublesymptom.png} @@ -2572,12 +2553,13 @@ in figure \ref{fig:Pt100_doublef}. } %% \ifthenelse {\boolean{pld}} { } - +% %\clearpage \subsection{Derived Component : The Pt100 Circuit} +\label{sec:Pt100floating} The Pt100 circuit again, can now be treated as a component in its own right, and has two failure modes, {\textbf{OUT\_OF\_RANGE}} and {\textbf{FLOATING}}. - +% \ifthenelse{\boolean{pld}} { It can now be represented as a PLD see figure \ref{fig:Pt100_doublef}. @@ -2591,9 +2573,9 @@ It can now be represented as a PLD see figure \ref{fig:Pt100_doublef}. } % \ifthenelse {\boolean{pld}} { } - - - +% +% +% % The resistors R1, R2 form a summing junction % to the negative input of IC1. % Using the earlier definition for resistor failure modes, @@ -2621,18 +2603,17 @@ It can now be represented as a PLD see figure \ref{fig:Pt100_doublef}. % % This summing junction fails with two symptoms. We create a {\dc} called $SUMJUNCT$ and we can state, % $$fm(SUMJUNCT) = \{ R1\_IN\_DOM, R2\_IN\_DOM \} $$. - - +% +% %The D type flip flop - +% %\subsection{FMMD Process applied to $\Sigma \Delta $ADC}. - +% %T%he block diagram in figure~\ref{fig - - - - - +% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/submission_thesis/CH6_Software_Examples/software.tex b/submission_thesis/CH6_Software_Examples/software.tex index 2ea4fe8..126a577 100644 --- a/submission_thesis/CH6_Software_Examples/software.tex +++ b/submission_thesis/CH6_Software_Examples/software.tex @@ -12,7 +12,7 @@ With modular FMEA i.e. FMMD %(FMMD) we have the concepts of failure~modes of components, {\fgs} and symptoms of failure. % for a functional group. % -A programmatic function has similarities with these concepts. %a {\fg} as defined by the FMMD process. +A programmatic function has similar attributes to an FMMD {\fg}. % with these concepts. %a {\fg} as defined by the FMMD process. % An FMMD {\fg} is placed into a hierarchy, likewise a software function is typically placed into the hierarchy of its call-tree. @@ -21,10 +21,10 @@ which could be viewed as its `components': it has outputs, i.e. it can perform actions on data or hardware. %which will be used by other functions that may call it. % -We show that we can map a software function to a {\fg} in FMMD: its failure modes +It is shown below that a software function can be mapped to an FMMD {\fg}: its failure modes are the failure modes of the software components %(other functions it calls %) -and the hardware from which it reads values. +and/or the hardware from which it reads values. Its outputs are the data it changes, or the hardware actions it performs. %% %% Talk about how software specification will often say how hardware @@ -32,13 +32,16 @@ Its outputs are the data it changes, or the hardware actions it performs. %% always cover the failure modes of the hardware being interfaced too. % When we have analysed a -software function---using failure conditions of its inputs as failure modes---we can -determine its symptoms of failure (i.e. how calling functions will see its failure mode behaviour). +software function---using failure conditions of its inputs as a source of failure modes---we can +determine its symptoms of failure (i.e. how functions that call it will see its failure mode behaviour). % -We apply the FMMD process to software functions by viewing them in terms of their failure mode behaviour. +FMMD is applied to software functions by viewing them in terms of their failure mode behaviour. % -As software already fits into a hierarchy we have one less analysis decision to make, compared +That is to say, using FMMD, software functions are treated like {\fgs} of electronic components. +% +% +As software already fits into a hierarchy we have one less analysis decision to make, when compared to analysing electronics. % For electrical and mechanical systems, although we may be guided by the original designers @@ -78,13 +81,18 @@ we form a complete failure mode hierarchy of the system under investigation. Software written for safety critical systems is usually constrained to be modular~\cite{en61508}[3] and non recursive~\cite{misra}[15.2]. %{iec61511}. -Because of this we can assume direct call trees~\footnote{A typical embedded system -will have a run time call tree, and (possibly multiple) interrupt sourced call trees.}. Functions call functions +% +Because of this we can assume direct call trees\footnote{A typical embedded system +will have a run time call tree, and (possibly multiple) interrupt sourced call trees.}. +% +Functions call functions from the top down and eventually call the lowest level library or IO functions that interact with hardware.%/electronics. -What is potentially difficult with a software function, is deciding what -its failure modes and symptoms are. +What is potentially difficult with applying FMMD to a software function, is deciding how to map +its component failure modes %(in electronics the failure modes of its components)---and +and its symptoms of failure in a manner compatible with the FMMD process. %(the failure modes of a function taken as a {\dc}) are. +% With electronic components, we can use literature to point us to suitable sets of {\fms}~\cite{fmd91}~\cite{mil1991}~\cite{en298}. %~\cite{en61508}~\cite{en298}. With software only some library functions are well known and rigorously documented @@ -139,16 +147,17 @@ equivalent to failure mode of `one of its components'. \paragraph{Mapping contract `post-condition' violations to symptoms.} \fmmdglossCONTRACTPROG -A post condition is a definition of correct behaviour of a function. % -A violated post condition is a symptom of failure, or derived failure mode, from a function. +A post-condition is a definition of correct behaviour of a function. +% +A violated post-condition is a symptom of failure, or, in FMMD terms a derived failure mode, for a function. % Post conditions could be either actions performed (i.e. the state of hardware changed) or an output value of a function. In pure contract programming, a violation of a pre-condition would cause the function to \textbf{not} be executed. % In implementation code, a pre-condition violation should cause -an error to be generated, and thus a post condition to fail. +an error to be generated, and thus a post-condition to fail. % A function can fail for reasons other than corruption of its input data (i.e. failure caused by variables it uses or return values from functions it calls). @@ -159,11 +168,13 @@ by another software function erroneously overwriting variables~\cite{swseatbelt} Current work on software FMEA generally focuses on mapping variable corruption to failure modes~\cite{procsfmea,procsfmeadb,sfmeaauto,sfmea}. However, errors other than variable corruption can occur. +% For instance a microprocessor may have subtle bugs in its instruction set, or incorrectly handled -interrupt contention which could cause side effects in software. +interrupt contention~\cite{concurrency_c_tool} which could cause side effects in software. +% For the failure mode model of any software function, -we must consider that all failure modes defined by post condition +we must consider that all failure modes defined by post-condition violations could simply occur. %`components'. @@ -176,7 +187,7 @@ Here they are taken to mean invariants applying to data or conditions that the function under analysis deals with or could be affected by. % Invariants in contract programming may apply to inputs to the function (where violations can be considered {\fms} in FMMD terminology), -and to outputs (where violations can be considered {\fms} in FMMD terminology). +and to outputs (where violations can be considered symptoms, or derived {\fms}, in FMMD terminology). \fmmdglossCONTRACTPROG \subsection{Combined Hardware/Software FMMD} @@ -187,12 +198,14 @@ that is nearly always used in conjunction with a programmatic element. A common method for delivering a quantitative value in analogue electronics is to supply a current signal to represent the value to be sent~\cite{aoe}[p.934]. % -Usually, $4mA$ represents a zero or starting value and $20mA$ represents the full scale, -and this is referred to as {\ft} signalling. +Commonly, $4mA$ represents a zero or starting value and $20mA$ represents the full scale, +and this is referred to as {\ft} signalling\footnote{Various current ranges have been used for +value sending via electrical current, {\tenfifty}, being one other range used. However {\ft} signalling +has emerged as the industry standard over the last few decades.}. % Using current instead of voltage to transmit an analogue value has intrinsic electrical safety advantages mainly due to -current being constant in a circuit (Kirchovs current law). +current being constant in a circuit (Kirchoff's current law~\cite{rdh}[p.160]). % What is sent as current is what will arrive at the receiving end. @@ -218,7 +231,7 @@ arrive at the receiving end. \begin{figure}[h] \centering - \includegraphics[width=230pt]{./CH5_Examples/ftcontext.png} + \includegraphics[width=430pt]{./CH5_Examples/ftcontext.png} % ftcontext.png: 767x385 pixel, 72dpi, 27.06x13.58 cm, bb=0 0 767 385 \caption{Context Diagram for {\ft} loop} \label{fig:ftcontext} @@ -322,21 +335,18 @@ voltage for a given ADC channel. This function deals directly with the hardware in the micro-controller on which the software is running. %software on. % -The software's job is to select the correct channel (ADC multiplexer) and then to initiate a +The function \cf{read\_ADC}'s job is to select the correct channel (ADC multiplexer) and then to initiate a conversion by setting an ADC 'go' bit (see code sample in figure~\ref{fig:code_read_ADC}). % It takes the raw ADC reading and converts it into a floating point\footnote{the type, `double' or `double precision', is a standard C language floating point type~\cite{DBLP:books/ph/KernighanR88}.} voltage value. - - - - - -%{\vbox{ +% +% +% \begin{figure}[h+] - +% \footnotesize \begin{verbatim} /***********************************************/ @@ -351,6 +361,7 @@ voltage value. /***********************************************/ double read_ADC( int channel ) { int timeout = 0; + int dval = -3.0; /* require: a) input channel from ADC to be in valid ADC range b) voltage ref is 0.1% of 5V */ @@ -359,37 +370,29 @@ double read_ADC( int channel ) { /* if invalid channel selected */ if ( channnel > ADC_CHAN_RANGE ) return -2.0; - /* set the multiplexer to the desired channel */ ADCMUX = channel; - ADCGO = 1; /* initiate ADC conversion hardware */ - /* wait for ADC conversion with timeout */ - while ( ADCGO == 1 || timeout < 100 ) + while ( ADCGO == 1 || timeout < 120 ) timeout++; - if ( timeout < 100 ) + /* the following converts ADC12 counts to voltage */ dval = (double) ADCOUT * 5.0 / ADCRANGE; else dval = -1.0; /* indicate invalid reading */ - /* return voltage as a floating point value */ - /* ensure: value is voltage input to within 0.1% */ - return dval; } \end{verbatim} \caption{Software Function: \cf{read\_ADC}} \label{fig:code_read_ADC} \end{figure} -%} -%} \clearpage - +% We now have a very simple software structure, a call tree, shown in figure~\ref{fig:ct1}. - +% \begin{figure}[h] \centering \includegraphics[width=100pt]{./CH5_Examples/ct1.png} @@ -397,7 +400,7 @@ We now have a very simple software structure, a call tree, shown in figure~\ref{ \caption{Call tree for software example} \label{fig:ct1} \end{figure} - +% This software is above the ADC hardware in the conceptual call tree---from a programmatic perspective---%in software terms---the the software is reading values from the `lower~level' electronics. % @@ -410,19 +413,23 @@ We can identify the resistor and the ADC module of the micro-controller as the base components in this design. % We now apply FMMD starting with the hardware. - - +% +% \subsection{FMMD Process} - -\paragraph{Functional Group - Convert mA to Voltage - CMATV} - -This functional group contains the load resistor +% +\paragraph{Hardware only Functional Grouping - Convert mA to Voltage - CMATV} +% +This {\fg}, $G_1$, contains the load resistor and the physical Analogue to Digital Converter (ADC). -%Our functional group, $G_1$ is thus the set of base components: $G_1 = \{R, ADC\}$. +% +$G_1$ is thus the set of base components: $G_1 = \{R, ADC\}.$ +It is therefore a hardware only {\fg}. +% + %We now determine the {\fms} of all the components in $G_1$. -We now determine the {\fms} of all the components in the {\fg}. +We now determine the {\fms} of all the components in the {\fg} $G_1$. For the resistor we can use a failure mode set from the literature~\cite{en298}. -Where the function $fm$ returns a set of failure modes for a given component we can state: +Where the function $fm$ returns a set of failure modes for a given component: % we state: $$ fm(R) = \{OPEN,SHORT\}. $$ \vbox{ @@ -432,81 +439,64 @@ For the ADC we can determine the following failure modes: \item STUCKAT --- The ADC outputs a constant value, \item MUXFAIL --- The ADC cannot select its input channel correctly, \item LOW --- The ADC output is always LOW, or zero ADC counts, - \item HIGH --- The ADC output is always HIGH, or max ADC counts. + \item HIGH --- The ADC output is always HIGH, or maximum ADC counts. \end{itemize} } We can use the function $fm$ to define the {\fms} of an ADC thus: $$ fm(ADC) = \{ STUCKAT, MUXFAIL,LOW, HIGH \}. $$ - -With these failure modes, we can analyse our first functional group, see table~\ref{tbl:cmatv}. +% +With these failure modes defined, analysis can begin on the {\fg} $G_1$, see table~\ref{tbl:cmatv}. { \tiny \begin{table}[h+] \center -\caption{$G_1$: Failure Mode Effects Analysis} % title of Table +\caption{{\fg} $G_1$: Failure Mode Effects Analysis} % title of Table \label{tbl:cmatv} -\begin{tabular}{|| l | c | l ||} \hline - %\textbf{Failure} & \textbf{failure} & \textbf{Symptom} \\ - %\textbf{Scenario} & \textbf{effect} & \textbf{ADC } \\ \hline - % & & & & \\ - - \textbf{Failure} & \textbf{Failure } & \textbf{Derived Component} \\ +\begin{tabular}{|| l | c | l ||} \hline \hline + \textbf{Failure} & \textbf{Failure } & \textbf{Derived Component} \\ \textbf{cause} & \textbf{Effect} & \textbf{Failure Mode} \\ - - \hline \hline - 1: $R_{OPEN}$ & resistor open, & $HIGH$ \\ - & voltage on pin high & \\ \hline + 1: $R_{OPEN}$ & resistor open, & $HIGH$ \\ + & voltage on pin high & \\ \hline - 2: $R_{SHORT}$ & resistor shorted, & $LOW$ \\ - & voltage on pin low & \\ \hline \hline - - - - 3: $ADC_{STUCKAT}$ & ADC reads out & $V\_ERR$ \\ - & fixed value & \\ \hline - - - - 4: $ADC_{MUXFAIL}$ & ADC may read & $V\_ERR$ \\ - & wrong channel & \\ \hline - - 5: $ADC_{LOW}$ & output low & $LOW$ \\ - 6: $ADC_{HIGH}$ & output high & $HIGH$ \\ \hline + 2: $R_{SHORT}$ & resistor shorted, & $LOW$ \\ + & voltage on pin low & \\ \hline \hline + 3: $ADC_{STUCKAT}$ & ADC reads out & $V\_ERR$ \\ + & fixed value & \\ \hline + 4: $ADC_{MUXFAIL}$ & ADC may read & $V\_ERR$ \\ + & wrong channel & \\ \hline + 5: $ADC_{LOW}$ & output low & $LOW$ \\ \hline + 6: $ADC_{HIGH}$ & output high & $HIGH$ \\ \hline % % As Chris Garrett points out there is no software involved at this stage! - %7: post condition fails & software fails & $V\_ERR$ \\ + %7: post-condition fails & software fails & $V\_ERR$ \\ % & \hline - - \hline - - -\hline - \end{tabular} \end{table} } - - -We now collect the symptoms for the hardware functional group, $\{ HIGH , LOW, V\_ERR \} $, and -create a {\dc} to represent this called, $CMATV$. - +% +% +Common failure symptoms are now collected for $G_1$, these being $\{ HIGH , LOW, V\_ERR \} $. +Using the common failure symptoms +a {\dc} is created, $CMATV$ (an an acronym for {\em Convert milli-amps to Voltage}). +% %We can express this using the `$\derivec$' function thus: %$$ CMATV = \; \derivec (G_1) .$$ - -As its failure modes are the symptoms of failure from the functional group we state: +% +As its failure modes are the collected symptoms of failure from the {\fg} $G_1$, +the failure modes for the new {\dc} are: %we state: $$fm ( CMATV ) = \{ HIGH , LOW, V\_ERR \} .$$ - - -\paragraph{Functional Group - Software - Read\_ADC - RADC} +% +% +\paragraph{software and hardware hybrid {\fg} --- RADC} \label{readADC} -The software function $Read\_ADC$ uses the ADC hardware analysed +The software function \cf{Read\_ADC} uses the ADC hardware analysed as the {\dc} CMATV above. - - +% +% The code fragment in figure~\ref{fig:code_read_ADC} states pre-conditions, as {\em/* require: a) input channel from ADC to be in valid ADC range @@ -522,97 +512,89 @@ which we can call $ CHAN\_NO $. The reference voltage for the ADC has a 0.1\% accuracy requirement. % If the reference value is outside this, it is also a {\fm} -of this function, which we can call $V\_REF$ (this failure mode is detectable %observable +of this function, which we can call $V\_REF$ (nb: this failure mode is detectable %observable only if we specifically use a test input to measure the reference). - +% Taken as a component for use in FMEA/FMMD our function has two failure modes. We can therefore treat it as a generic component, $Read\_ADC$, by stating: - +% $$ fm(Read\_ADC) = \{ CHAN\_NO, VREF \} $$ - -As we have a failure mode model for our function, we use it in conjunction +% +With the failure mode model for our function, we use it in conjunction with the ADC hardware {\dc} CMATV, to form a {\fg} $G_2$, where $G_2 =\{ CMSTV, Read\_ADC \}$. % -We analyse this hardware/software combined {\fg}. - - - +This analysis is performed in table~\ref{tbl:radc}. %{ hardware/software combined {\fg}. +% +% { \tiny \begin{table}[h+] \center -\caption{$G_2$: Failure Mode Effects Analysis} % title of Table +\caption{{\fg} $G_2$: Failure Mode Effects Analysis} % title of Table \label{tbl:radc} - \begin{tabular}{|| l | c | l ||} \hline -% \textbf{Failure} & \textbf{failure} & \textbf{Symptom} \\ -% \textbf{Scenario} & \textbf{effect} & \textbf{RADC } \\ \hline - \textbf{Failure} & \textbf{Failure } & \textbf{Derived Component} \\ \textbf{cause} & \textbf{Effect} & \textbf{Failure Mode} \\ - - \hline 1: ${CHAN\_NO}$ & wrong voltage & $VV\_ERR$ \\ & read & \\ \hline - 2: ${VREF}$ & ADC volt-ref & $VV\_ERR$ \\ & incorrect & \\ \hline \hline - - - 3: $CMATV_{V\_ERR}$ & voltage value & $VV\_ERR$ \\ & incorrect & \\ \hline - - - 4: $CMATV_{HIGH}$ & ADC may read & $HIGH$ \\ & wrong channel & \\ \hline - - 5: $CMATV_{LOW}$ & output low & $LOW$ \\ \hline - - 6: post condition fails & software fails & $VV\_ERR$ \\ + 5: $CMATV_{LOW}$ & output low & $LOW$ \\ \hline + 6: post-condition fails & software fails & $VV\_ERR$ \\ & C function: Read\_ADC & \\ \hline - \hline - - \hline - \end{tabular} \end{table} } - - - -We now collect the symptoms of failure for the {\fg} analysed (see table~\ref{tbl:radc}) -as $\{ VV\_ERR, HIGH, LOW \}$. We can add as well the violation of the postcondition -for the function. +% +% +% +The common symptoms of failure from table~\ref{tbl:radc} are collected giving +$\{ VV\_ERR, HIGH, LOW \}$. Any violations of postconditions for software functions in the {\fg} $G_2$ must be added to this set. This postcondition, {\em /* ensure: value is voltage input to within 0.1\% */}, -corresponds to $VV\_ERR$, and is already in the {\fm} set for this {\fg}. - +corresponds to $VV\_ERR$, and happens to already be in the {\fm} set for this {\fg}. +% %We can now create a {\dc} called $RADC$ thus: $$RADC = \; \derivec(G_2)$$ which has the following %{\fms}: -We can now create a {\dc} called $RADC$ thus: +We can now create a {\dc} called $RADC$ with its failure modes thus: $$ fm(RADC) = \{ VV\_ERR, HIGH, LOW \} .$$ - - - - - -\paragraph{Functional Group - Software - voltage to per mil - VTPM } - -This function sits on top of the $RADC$ {\dc} determined above. -We look at the pre-conditions for the function \cf{read\_4\_20\_input}, % which we can call $RI$ -to determine its {\fms}. -Its pre-condition is, {\em /* require: input from ADC to be between 0.88 and 4.4 volts */}. -We can map this violation of the pre-condition, to the {\fm} VRNGE; %As this function has one pre-condition -we can state, % -$$ fm(read\_4\_20\_input) = \{ VRNGE \} .$$ +% +% +% +% +% +\paragraph{Functional Group - Software - voltage to per mil - VTPM } +% +The next function higher in the call tree is \cf{read\_4\_20\_input}: +This function calls the function \cf{Read\_ADC} which +is a member of the {\fg} from which we derived the {\dc} $RADC$. +% +%and therefore sits on top of the {\dc} $RADC$ in the FMMD hierarchy. +%determined above. +% +The pre-conditions for the function \cf{read\_4\_20\_input} are examined % which we can call $RI$ +to determine its {\fms}. +% +Its one pre-condition is, {\em /* require: input from ADC to be between 0.88 and 4.4 volts */}. +% +This violation of the pre-condition can become the {\fm} VRNGE (an acronym for Voltage Range); %As this function has one pre-condition +we state, +% +$ fm(read\_4\_20\_input) = \{ RI_{VRNGE} \} .$ +To this we add the post-condition, {\em ensure: value is proportional (0-999) to the {\ft} input}, +which can be termed $VAL\_ERR$: the failure modes for \cf{read\_4\_20\_input} are now defined as: +$$ fm(read\_4\_20\_input) = \{ RI_{VRNGE}, RI_{VAL\_ERR} \} .$$ +% \fmmdglossCONTRACTPROG -We can now form a functional group with the {\dc} $RADC$ and the +A {\fg}, $G_3$, is formed with the {\dc} $RADC$ and the software component \cf{read\_4\_20\_input}, i.e. $G_3 = \{read\_4\_20\_input, RADC\} $. % { @@ -621,69 +603,66 @@ software component \cf{read\_4\_20\_input}, i.e. $G_3 = \{read\_4\_20\_input, RA \center \caption{$G_3$: \cf{Read\_4\_20}: Failure Mode Effects Analysis} % title of Table \label{tbl:r420i} - +% \begin{tabular}{|| l | c | l ||} \hline -% \textbf{Failure} & \textbf{failure} & \textbf{Symptom} \\ -% \textbf{Scenario} & \textbf{effect} & \textbf{RADC } \\ \hline \hline - \textbf{Failure} & \textbf{Failure } & \textbf{Derived Component} \\ - \textbf{cause} & \textbf{Effect} & \textbf{Failure Mode} \\ - % - % + \textbf{Failure} & \textbf{Failure } & \textbf{Derived Component} \\ + \textbf{cause} & \textbf{Effect} & \textbf{Failure Mode} \\ +% \hline - 1: $RI_{VRGE}$ & voltage & $OUT\_OF\_$ \\ - & outside range & $RANGE$ \\ \hline + 1: $RI_{VRGE}$ & voltage & $OUT\_OF\_$ \\ + & outside range & $RANGE$ \\ \hline + 2: $RI_{VAL\_ERR}$ & software fails & $VAL\_ERR$ \\ + post-condition fails & & \\ \hline % - 2: $RADC_{VV_ERR}$ & voltage & $VAL\_ERR$ \\ - & incorrect & \\ \hline \hline + 3: $RADC_{VV\_ERR}$ & voltage & $VAL\_ERR$ \\ + & incorrect & \\ \hline \hline % + 4: $RADC_{HIGH}$ & voltage value & $VAL\_ERR$ \\ + & incorrect & \\ \hline % - % - 3: $RADC_{HIGH}$ & voltage value & $VAL\_ERR$ \\ - & incorrect & \\ \hline + 5: $RADC_{LOW}$ & ADC low voltage & $OUT\_OF\_$ \\ + & so out of range & $RANGE$ \\ + & i.e. $< 0.88V$ & \\ % % -% - 4: $RADC_{LOW}$ & ADC low voltage & $OUT\_OF\_$ \\ - & so out of range & $RANGE$ \\ - & i.e. < 0.88V & \\ - \hline - % - 5: post condition fails & software fails & $VAL\_ERR$ \\ \hline -% \hline \hline % \end{tabular} \end{table} } - +% The failure symptoms for the {\fg} are $\{OUT\_OF\_RANGE, VAL\_ERR\}$. -The postcondition for the function \cf{read\_4\_20\_input}, {\em /* ensure: value is proportional (0-999) to the - 4 to 20mA input */} corresponds to the $VAL\_ERR$ and is already in the set of failure modes. +%The postcondition for the function \cf{read\_4\_20\_input}, {\em /* ensure: value is proportional (0-999) to the +% 4 to 20mA input */} corresponds to the $VAL\_ERR$ and is already in the set of failure modes. % \paragraph{Final Functional Group} For single failures these are the two ways in which this function -can fail. An $OUT\_OF\_RANGE$ will be flagged by the error flag variable. -The $VAL\_ERR$ will simply mean that the value read is incorrect. - -We can finally make a {\dc} to represent a failure mode model for our function $read\_4\_20\_input$. %thus: - +can fail. An $OUT\_OF\_RANGE$ condition will be flagged by the error flag variable. +The $VAL\_ERR$ will simply mean that the value read is incorrect: an undetectable +and therefore undesirable condition. +% +Finally a {\dc} is created to represent a failure mode model for our +combined hardware and software {\ft} input failure mode model. +This can be named $ R420I $, for {\em read {\ft} input}. +%function $read\_4\_20\_input$. %thus: +% % $$ R420I = \; \derivec(G_3) .$$ - -This new {\dc} has the following {\fms}: +% +This {\dc} has the following {\fms}: $$fm(R420I) = \{OUT\_OF\_RANGE, VAL\_ERR\} .$$ - +% % % Using the derived components, CMATV and VTPM we create % a new functional group. This % integrates FMEA's from software and eletronics % into the same failure mode model. - - - -We can now represent the software/hardware FMMD analysis +% +% +% +This software/hardware FMMD analysis is represented as a hierarchical diagram, see figure~\ref{fig:eulerswhw}. % see figure~\ref{fig:hd}. - +% % HTR 27OCT2012 % \begin{figure}[h] % HTR 27OCT2012 % \centering % HTR 27OCT2012 % \includegraphics[width=200pt]{./CH5_Examples/hd.png} @@ -691,7 +670,7 @@ as a hierarchical diagram, see figure~\ref{fig:eulerswhw}. % see figure~\ref{fig % HTR 27OCT2012 % \caption{FMMD hierarchy with hardware and software elements} % HTR 27OCT2012 % \label{fig:hd} % HTR 27OCT2012 % \end{figure} - +% \begin{figure}[h] \centering \includegraphics[width=300pt]{./CH5_Examples/eulerswhw.png} @@ -701,21 +680,27 @@ as a hierarchical diagram, see figure~\ref{fig:eulerswhw}. % see figure~\ref{fig and the inner two are electronic {\dcs}.} \label{fig:eulerswhw} \end{figure} - +% \subsection{Conclusion: {\ft} Reader Software/Hardware FMMD Model} - -The {\dc} representing the {\ft} reader -in software shows that by FMMD, we can integrate +% +The {\dc} representing the hybrid software and hardware {\ft} reader +demonstrates that FMMD can integrate software and electrical %electro-mechanical FMMD models. % With this analysis we have a complete `reasoning~path' linking the failures modes from the electronics to those in the software. -Each functional group to {\dc} transition represents a -reasoning stage. % -Each reasoning stage will have an associated analysis report. +Each functional group to {\dc} transition represents a +reasoning stage\footnote{Each of these reasoning stages, will have a reasoning distance +associated with it, and because {\fgs} are generally small %we can apply XFMEA +XFMEA can be applied +within those stages without undue state explosion problems.}. +% +Each reasoning stage will have an associated analysis report\footnote{Having an analysis report for each {\fg} +in a system analysed under FMMD, automatically provides a context sensitive documentation trail, improving +accessibility to anyone re-viewing or auditing the analysis.}. % With traditional FMEA methods the reasoning~distance is large, because it stretches from the component failure mode to the %top---or---system @@ -729,19 +714,24 @@ interfacing is usually treated as a separate FMEA task~\cite{sfmeainterface,embe We now have a {\dc} for a {\ft} input in software. Typically, more than one such input could be present in a real-world system. Not only have we integrated electronics and software in an FMEA, we can also -re-use the analysis for each {\ft} input in the system. +re-use the analysis for each {\ft} input. +%(i.e. in a typical system using this type of signalling +%we would often have several {\ft} inputs). The unsolved symptoms, or undetectable %unobservable errors, i.e. $VAL\_ERR$ could be addressed by another software function to read other known signals via the MUX (i.e. voltage references). This strategy would -detect ADC\_STUCK\_AT and MUX\_FAIL failure modes. - -A software specification for a hardware interface will concentrate on -how to interpret raw readings, or what signals to apply for actuators. -Using FMMD we can determine an accurate failure model for the interface as well~\cite{sfmeainterface}. - +detect ADC\_STUCK\_AT and MUX\_FAIL failure modes. Where the integrity of +the MUX is very demanding, separate pull down test lines may be implemented on the germane inputs as well. +% +A software specification for a hardware interface will typically concentrate on data formats, +how to interpret raw readings, or what digital signals to apply for actuators~\cite{sfmeainterface}. +Using FMMD the process naturally determines a failure model for the interface. % as well~\cite{sfmeainterface}. +\\ +\\ +The {\ft} example above is based on the paper presented to System Safety in 2012~\cite{syssafe2012}. % HTR == HATE TO REMOVE %HTR 18NOV2012 We can represent %the hierarchy in figure~\ref{fig:hd} algebraically, @@ -777,10 +767,13 @@ Temperature control is a first order differential problem, and is often addressed using the Proportional Integral Differential (PID) algorithm~\cite{dcods}[p.66]. % Traditionally this was performed in analogue electronics -with trimmer potentiometers providing the P,I and D parameters. -Since the introduction of micro-processors, it has been possible to +with trimmer potentiometers providing the P, I and D parameters. +% +Since the introduction of digital computers, it has been possible to implement PID in software. %pro-grammatically. -An FMMD analysis of a PID temperature controller is presented. %would mean an +% +A PID temperature controller is presented +as a complete example of an electronic/hardware hybrid analysed using FMMD. %would mean an %analysis of a realistic standalone system without being it becoming an un-wieldingly large task. % % \paragraph{The PID Temperature Control Algorithm.} % % PID control starts with a setpoint, or desired value for a process @@ -797,18 +790,19 @@ An FMMD analysis of a PID temperature controller is presented. %would mean an % \subsection{Design Stage: Implementation on a micro-controller.} When designing a computer program it is often useful to -start with a structured analysis `Yourdon' context diagram~\cite{Yourdon:1989:MSA:62004}, see figure~\ref{fig:context_diagram_PID}. - +start with a system overview. +A structured analysis `Yourdon' context diagram~\cite{Yourdon:1989:MSA:62004} is presented below, see figure~\ref{fig:context_diagram_PID}. +% \begin{figure}[h]+ \centering - \includegraphics[width=300pt]{./CH5_Examples/context_diagram_PID.png} + \includegraphics[width=400pt]{./CH5_Examples/context_diagram_PID.png} % context_diagram_PID.png: 818x324 pixel, 72dpi, 28.86x11.43 cm, bb=0 0 818 324 - \caption{Yourdon Context Diagram for PID Temperature Controller.} + \caption{Yourdon Context Diagram for a standalone micro-processor implemented PID Temperature Controller.} \label{fig:context_diagram_PID} \end{figure} - +% Using figure~\ref{fig:context_diagram_PID} we review the system in terms of its data flow, starting -with the data sources (the Pt100 inputs) and the data sinks (the heater output and the LED indicators). +with the data sources (the Pt100 temperature sensor inputs) and the data sinks (the heater output and the LED indicators). % We have two voltage inputs (see section~\ref{sec:Pt100}) from the Pt100 temperature sensor. For the Pt100 sensor, we will need to read the voltages it outputs and %for @@ -816,39 +810,55 @@ this will therefore require an ADC and MUX. % For the output, we can use a Pulse Width Modulator (PWM) (this is a common module found on micro-controllers -allowing a variable power output~\cite{aoe}[p.360]). PWM's ADC's and MUX's are commonly built into cheap micro-controllers~\cite{pic18f2523}[Ch.15]. +%allowing a +facilitating +variable power output~\cite{aoe}[p.360]). PWM's ADC's and MUX's are commonly built into cheap micro-controllers~\cite{pic18f2523}[Ch.15]. We refine the Yourdon diagram, with the afferent data flow coming through the MUX and ADC on the micro-controller, and the efferent -channelled through a PWM module, %again built into the micro-controller, +channelled through a PWM module. %again built into the micro-controller, % -and add more detail, see figure~\ref{fig:context_diagram2_PID}. +%and add more detail, see figure~\ref{fig:context_diagram2_PID}. \begin{figure}[h]+ \centering - \includegraphics[width=300pt]{./CH5_Examples/context_diagram2_PID.png} + \includegraphics[width=400pt]{./CH5_Examples/context_diagram2_PID.png} % context_diagram_PID.png: 818x324 pixel, 72dpi, 28.86x11.43 cm, bb=0 0 818 324 \caption{Yourdon Context Diagram for PID Temperature Controller.} \label{fig:context_diagram2_PID} \end{figure} -The Yourdon methodology allows us to zoom into data transform bubbles and analyse them in more -detail. +The Yourdon methodology allows us to zoom into data transform bubbles, analyse them in more +depth and create more paths and transform bubbles which further define the data flow and processing. % required. % -We define the controlling software, by looking at or zooming into its transform bubble. -We have the inputs and outputs from the software. -We refine the data flow within the software and thus define software functions. +This next stage of model refinement is shown in figure~\ref{fig:context_diagram2_PID}. +% +We define the controlling software, by looking at or zooming into transform bubbles +and refining them by adding detail. +% +Following the data streams through the process, additional transform bubbles are created as required. +% +The lines connecting the `transform~bubbles' define the data passed between them. +% +When the data transform analysis is finished, each transform bubble represents a software function. +% +Because the connecting lines define the data passed between transform bubbles, +the inputs and outputs of the associated software functions are also defined. +% +The Yourdon methodology thus allows the refinement and modelling +of a process from a data~flow perspective +defining software functions in its final stage. %, and %this in terms of software functions. % -We follow the data streams through the process, creating transform bubbles as required. In all `bare~metal'\footnote{`Bare~metal' is a term used to indicate a micro-processor -controlled system that does not use a traditional operating system.} +controlled system that does not use a traditional operating system. These are generally +coded in 'C' or assembly language and run immediately from power-up.} software architectures, we need a rudimentary operating system, often referred to as the `monitor'. % -We bear in mind that PID, because the algorithm depends heavily on integral calculus~\cite{dcods}[Ch.3.3] it is time sensitive +We bear in mind that PID, because the algorithm depends heavily on integral calculus~\cite{dcods}[Ch.3.3] is time sensitive and we therefore need to execute it at precise intervals determined by its proportional, integral and differential (PID) coefficients. % Most micro-controllers feature several general purpose timers~\cite{pic18f2523}. We can use an internal timer in conjunction with the monitor function -to call the PID algorithm at a regular time interval. % specified interval. +to call the PID algorithm at a regular and precise time interval. % specified interval. % \paragraph{Data flow model to programmatic call tree.} The Yourdon methodology also gives us a guide as to which software @@ -856,17 +866,23 @@ functions should be called to control the process, or in `C' terms be the main f % \begin{figure}[h] \centering - \includegraphics[width=300pt]{./CH5_Examples/context_software.png} + \includegraphics[width=400pt]{./CH5_Examples/context_software.png} % context_software.png: 1023x500 pixel, 72dpi, 36.09x17.64 cm, bb=0 0 1023 500 \caption{Context diagram of the software in the PID temperature controller} \label{fig:contextsoftware} \end{figure} +% Using figure~\ref{fig:contextsoftware} we can now pick the transform bubble we want to be the `main' or controlling function in the software. -This can be thought of as picking one bubble and holding it up. The other bubbles hang underneath +% +This can be thought of as picking one bubble and holding it up. +% +The other bubbles hang underneath forming the software call tree hierarchy, see figure~\ref{fig:context_calltree}. -From examining the diagram, and with common embedded programming practise, +% +From examining the diagram, and in common with established embedded programming practise, this is clearly going to be the monitor function. +% \begin{figure}[h]+ \centering \includegraphics[width=300pt]{./CH5_Examples/context_calltree.png} @@ -875,87 +891,93 @@ this is clearly going to be the monitor function. \label{fig:context_calltree} \end{figure} % - -\paragraph{Software Algorithm.} -The monitor function will orchestrate the control process. -Firstly it will examine the timer value, and when appropriate, call the PID function. -The PID function call -\cf{determine\_set\_point\_error} and that calls \cf{convert\_ADC\_to\_T} -which calls \cf{Read\_ADC} (the function developed in the earlier example) -which reads from physical hardware. % -With the set point error value the PID function will return - output control value to its calling -function (i.e. the PID -demand which will be returned to the monitor function). +\paragraph{Software Algorithm.} +% +The monitor function will orchestrate the control process. +% +Firstly it will examine the timer value, and when appropriate, call the \cf{PID} function. +% +The \cf{PID} function calls \cf{determine\_set\_point\_error} which calls \cf{convert\_ADC\_to\_T} +which in turn calls \cf{Read\_ADC} (the function developed in the earlier example) +which reads from hardware. +% +With the set point error value the \cf{PID} function will return an output control value to its calling +function (i.e. the PID demand which will be returned to the monitor function). % %On returning to the monitor function, it will return the PID demand value. The PID demand value will be applied via the PWM. +% We now have a rudimentary closed loop control system incorporating both hardware and software. % By using the Yourdon methodology we obtain a programmatic design frame-work i.e. a call tree structure. % We now have all the components, i.e. hardware elements and software functions that will be used in the temperature controller. +% We list these, and begin, from the bottom-up, to apply FMMD analysis. - +% \clearpage \subsection{FMMD Analysis of PID temperature Controller} - +% To summarise from the design stage, the electronic components identified thus far: \begin{itemize} - \item ADCMUX --- Electronics, analysed in previous example. - \item TIMER --- Internal micro controller timer - \item HEATER --- Heating element, essentially a resistor. - \item Pt100 --- Pt100 Temperature sensor, as analysed in section~\ref{sec:Pt100}. - \item PWM --- Internal micro controller pulse width modulation module - \item General Purpose I/O (GPIO) --- I/O used to drive LEDS %. %source LED current - \item LEDs --- Indication LEDs via GPIO - \item micro-controller --- the medium for running the software + \item ADCMUX --- Electronics, analysed in previous example, + \item TIMER --- Internal micro controller timer, + \item HEATER --- Heating element, essentially a resistor, + \item Pt100 --- Pt100 Temperature sensor, as analysed in section~\ref{sec:Pt100}, + \item PWM --- Internal micro controller pulse width modulation module, + \item General Purpose I/O (GPIO) --- I/O used to drive LEDS, %. %source LED current + \item LEDs --- Indication LEDs via GPIO, + \item micro-controller --- the medium for running the software. \end{itemize} - - - - - - +% \subsection{Temperature Controller Hardware Elements FMMD.} - -\paragraph{ACDMUX and Read\_ADC} -We re-use this derived component from section~\ref{readADC}. +% +\paragraph{ADCMUX and Read\_ADC.} +We re-use the {\dc} from section~\ref{readADC}. $$ fm(RADC) = \{ VV\_ERR, HIGH, LOW \} .$$ - - +% +% \paragraph{TIMER.} -The internal timer in use is a register which when read +The internal timer, from a programmers perspective is a register, which when read returns an incremented time value. +% +Essentially its a free running integer counter with an interfacing register. +% Using two's complement mathematics, by subtracting the time we last read it, we can calculate the interval between readings (assuming the timer has not wrapped around more than once). +% We can say that a timer can fail by incrementing its value at an incorrect rate, or can stop incrementing. -$$ fm(TIMER) = \{ STOPPED, INCORRECT\_INTERVAL \}$$ - +% +We define the failure modes of $TIMER$ thus: +$$ fm(TIMER) = \{ STOPPED, INCORRECT\_INTERVAL \}.$$ +% \paragraph{HEATER.} A heating element is typically some configuration of resistive wire. -It therefore has the same failure modes as a resistor and we can state -$$fm(HEATER) = \{ OPEN, SHORT \}$$ - +It therefore has the same failure modes as a resistor: +$$fm(HEATER) = \{ OPEN, SHORT \} .$$ +% \paragraph{Pt100 Platinum Temperature Sensor.} -The Pt100 four wire configuration is analysed in section~\ref{sec:Pt100} -$$ fm(Pt100) = \{ OUT\_OF\_RANGE \} $$ - - +The Pt100 four wire configuration was analysed in section~\ref{sec:Pt100}, the {\dc} is re-used here: +$$ fm(Pt100) = \{ OUT\_OF\_RANGE \} . $$ +% +% \paragraph{PWM.} %The PWM, in use, is a hardware register written to with an integer value~\cite{pic182523}[Ch.15]. From a programmatic perspective a PWM output is a register to which software writes an unsigned magnitude value~\cite{pic18f2523}[Ch.15]. +% The PWM hardware module applies this using a mark space ratio proportional to that value, providing a means of varying the amount of power supplied. +% When the PWM action is halted, or fails, the digital output pin associated with it will typically be held in a high or low state. +% We therefore state: $$ fm(PWM) = \{ HIGH, LOW \}.$$ @@ -966,46 +988,66 @@ At a minimum it would include a micro-processor with PROM and RAM general I/O and external interrupt lines. % Typically there are many other I/O modules incorporated (e.g. TIMERS, UARTS, PWM, ADC, ADCMUX, CAN). -In this project we are using the ADCMUX, TIMER, PWM and general purpose computing facilities. +% +In this project the ADCMUX, TIMER, PWM and general purpose computing facilities are used. +% We have to therefore consider the general~computing, CLOCK, PROM and RAM failure modes. $$fm (micro-controller) =\{ PROM\_FAULT, RAM\_FAULT, CPU\_FAULT, ALU\_FAULT, CLOCK\_STOPPED \}.$$ % \subsection{Temperature Controller Software Elements FMMD} Identified Software Components: \begin{itemize} - \item --- \cf{Monitor} (which calls PID algorithm and sets status LEDS) - \item --- \cf{PID} (which calls \cf{determine\_set\_point\_error} and \cf{output\_control}) - \item --- \cf{determine\_set\_point\_error} (which calls convert\_ADC\_to\_T) - \item --- \cf{convert\_ADC\_to\_T} (which calls read\_ADC which we can re-use from the last example) - \item --- \cf{read\_ADC} - \item --- \cf{output\_control} (which sets the PWM hardware according to the PID demand value) + \item --- \cf{Monitor} (which calls PID algorithm and sets status LEDS), + \item --- \cf{PID} (which calls \cf{determine\_set\_point\_error} and \cf{output\_control}), + \item --- \cf{determine\_set\_point\_error} (which calls convert\_ADC\_to\_T), + \item --- \cf{convert\_ADC\_to\_T} (which calls read\_ADC which we can re-use from the last example), + \item --- \cf{read\_ADC}, + \item --- \cf{output\_control} (which sets the PWM hardware according to the PID demand value). \end{itemize} -With the call tree structure defined (see figure~\ref{fig:context_calltree}), we can now analyse these -components from the bottom-up, starting with the afferent flow, the reading of the temperature and its conversion -to a PID calculated heater output demand. - +% +% +With the call tree structure defined (see figure~\ref{fig:context_calltree}), +we have a hierarchy compatible with FMMD for analysis. +However, it is only the top, the software, part of the hierarchy. +% +FMMD is a bottom-up process and we must start at the lowest level, the electronics. +% +The Yourdon context diagram (see figure~\ref{fig:context_diagram_PID}) is useful here as its data sources and sinks are +by definition the lowest levels in a system. +% +We can follow the input, or afferent data flow to find the bottom levels for system inputs +and the output, or efferent flow to find the bottom level for outputs/actuators etc. +% +Starting with the afferent flow, the reading of the temperature and its conversion +to a PID calculated heater output demand is examined. +% \subsubsection{Afferent flow FMMD analysis, Pt100, temperature, set point error, PID output demand.} -We start with the afferent flow from the Pt100. +Staring with the afferent data flow for the temperature readings, we find the lowest +level in the hierarchy, the Pt100 sensor. %with the software, and consider the hardware elements %used (if any) by each software function. Starting at the bottom, we form a {\fg} with the function \cf{read\_ADC} and the Pt100. -This gives us a {\dc} which we call -ReadPt100. +This gives us a {\dc}, %which we call +`Read\_Pt100'. % % % -The {\dc} Read\_Pt100 is a failure mode model of the \cf{Read\_ADC} function and the Pt100 -hardware, and has the following failure modes: - +The {\dc} Read\_Pt100 is a failure mode model of the \cf{Read\_ADC} software function and the Pt100 +hardware, this has the following failure modes: +% $$ fm (Read\_Pt100) = \{ VOLTAGE\_HIGH, VAL\_ERR, VOLTAGE\_LOW \}. $$ - - -We move along the afferent flow, and we come to the \cf{convert\_ADC\_to\_T} function. -This will call \cf{Read\_ADC} twice, one for the high Pt100 value, again for the lower. % and once for to read a current sense. +% +% +Moving along the afferent flow, the \cf{convert\_ADC\_to\_T} function is next up the hierarchy. +% +This will call \cf{Read\_ADC} twice, once for the high Pt100 value, again for the lower. % and once for to read a current sense. +% We then, calculate the resistance of the Pt100 element, and with this---using a polynomial or a lookup table~\cite{eurothermtables}---calculate the temperature. +% \fmmdglossCONTRACTPROG +% The pre-conditions for the function are that: \begin{itemize} % \item The current calculated is within pre-defined bounds i.e. Pt100\_current, @@ -1014,47 +1056,68 @@ The pre-conditions for the function are that: \item The lower and higher values agree to within a given tolerance i.e. Pt100\_high\_low\_mismatch. \end{itemize} Any violation of these pre-conditions is equivalent to a failure mode. -Note that a temperature outside the pre-defined range will also cause these errors. -The postcondition is that it returns a temperature within a given tolerance to the temperature at the sensor. +% +Note that a temperature outside the pre-defined range would be detected as an acceptable voltage +failure. +% +The post-condition is that it returns a temperature within a given tolerance to the temperature at the sensor. +% A failure of this post-condition can be termed temp\_incorrect. +% \clearpage We apply FMMD to the {\fg} formed by \cf{Read\_Pt100} and the function \cf{convert\_ADC\_to\_T}. -We can call the resulting {\dc} \cf{Get\_Temperature}. This analysis is presented in table~\ref{tbl:gettemperature}. -The analysis is presented in table~\ref{tbl:readPt100}. - - - -We collect the failure symptoms for the {\dc} Get\_Temperature and can state: - -$$fm(Get\_Temperature) = \{ Pt100\_out\_of\_range, temp\_incorrect \}$$ +% +We can call the resulting {\dc} {Get\_Temperature}. This analysis is presented in table~\ref{tbl:gettemperature}. +% +The analysis for the Pt100 circuit is presented in table~\ref{tbl:readPt100}. +% +% +Failure symptoms are collected and the {\dc} created with the following failure modes: +% +% +$$fm(Get\_Temperature) = \{ Pt100\_out\_of\_range, temp\_incorrect \} . $$ \clearpage - -Following the afferent flow further, we come to a function to determine the control error value. -This is simply the target temperature subtracted from the measured. -We thus form a {\fg} with our newly {\dc} Get\_Temperature +% +% +Following the afferent flow further, the function to determine the control error value is examined. +% +This is simply the target temperature subtracted from that measured by the sensor. +% +A {\fg} is formed with our newly {\dc} Get\_Temperature and the function \cf{determine\_set\_point\_error}. % The pre-condition for \cf{determine\_set\_point\_error} is that the temperature read by it -is accurate, and its post condition is to return the correct control error value. -Most failure modes from a Pt100 are observable. -We can divide the post condition into two variants, a known incorrect error value, KnownIncorrectErrorValue -where we can detect the Pt100 value is suspect, and IncorrectErrorValue where we simply have -an incorrect error value. This analysis is presented in table~\ref{tbl:geterror}. - - - - -We collect failure mode symptoms, and can create a new {\dc} GetError -where +is accurate, and its post-condition is to return the correct control error value. +% +All single failure modes from a four wire Pt100 sensor are detectable (see section~\ref{sec:singlePt100FMEA}). +% +For most practical purposes this would suffice, but for the purpose of example +a particular double failure scenario, potentially giving an undefined value is +considered (see section~\ref{sec:Pt100floating}). +% +The post-condition thus has two variants, an incorrect value that is detected, KnownIncorrectErrorValue +%where we can detect the Pt100 value is suspect, +and IncorrectErrorValue where we simply have +an incorrect value but cannot determine this. +% +This analysis is presented in table~\ref{tbl:geterror}. +% +% +% +% +Failure mode symptoms are collected and a new {\dc} GetError created +where: $$fm(GetError) = \{ KnownIncorrectErrorValue, IncorrectErrorValue \}.$$ - - -We now follow the afferent path to the PID algorithm. -Here we assume that the PID constants are fixed (i.e. are not parameters). -We use the $GetError$ {\dc} and the PID function to form a {\fg}. -The pre-condition for the \cf{PID} function is that % are that it is called -%iat the correct frequency and that +% +Following the afferent path the PID algorithm is next in the software call tree. +% +%Here we assume that the PID constants are fixed (i.e. are not parameters). +% +The $GetError$ {\dc} and the \cf{PID} function form a {\fg}. +% +The pre-condition for the \cf{PID} function is that it receives the correct error value. +% The post-condition is that it outputs correct control values. % RESP FOR TIMEING IS ON CALLING FUNCTION AND IS A SEPARATE ERROR- TGHINK ABOUT JITTER..... % and controll values..... Jitter might not matter, wrong int times would @@ -1064,25 +1127,25 @@ All digital signal processing algorithms are sensitive to calling frequency, and Were this function to be called at an incorrect rate, its output could be erroneous (the differential and integral parameters would effectively have been changed). % -However this problem is a failure mode for the function calling it i.e. the context of use (see section~\ref{sec:subjectiveobjective}) . -% discussion, the subjective -% being the context the {\dc} is used for/in, and the objective -% being the logic and process of the failure mode analysis. +However this problem is a failure mode for the consideration of the function calling it i.e. the context of use (see section~\ref{sec:subjectiveobjective}). % -The calling function sets the context for the PID algorithm (i.e. what it is used for). +That is, the \cf{PID} function is called, and the calling function is responsible for the timing, +or in more general terms +it is the calling function that sets the context for the \cf{PID} function (i.e. what it is used for). %If this PID were to be used, say as some form of low pass filter, we could consider jitter %for instance. % %In a control environment with PID, jitter would not be a significant factor. % %HARK THE HERALD ANGELS SING... HARK???? - - - -We now create a PID {\dc}, with the following failure modes: - +% +% +% +The {\dc} PID is created, with the following failure modes: +% $$ fm(PID) = \{ KnownControlValueErrorV, IncorrectControlErrorV \} .$$ - +% +% \begin{figure}[h] \centering \includegraphics[width=400pt]{./CH5_Examples/euler_afferent_PID.png} @@ -1090,33 +1153,45 @@ $$ fm(PID) = \{ KnownControlValueErrorV, IncorrectControlErrorV \} .$$ \caption{Euler diagram representing the hierarchy of FMMD analysis applied to the afferent branch of call tree for the PID temperature controller example.} \label{fig:euler_afferent_PID} \end{figure} - - - -We have now modelled the software call tree for the afferent flow; we represent this as an Euler diagram in figure~\ref{fig:euler_afferent_PID}. +% +% +% +The software call tree for the afferent flow has now been modelled using FMMD; +this is represented as an Euler diagram in figure~\ref{fig:euler_afferent_PID}. Two call tree branches remain. The LED indication branch and the PWM/heater output. - +% \subsubsection{Efferent flow, PID demand value to PWM output} - +% The monitor function calls the \cf{output\_control} function with the PID demand. +% The \cf{output\_control} function then sets the PWM hardware register, which causes the mark space output of the PWM module to -apply the demanded power. We form a {\fg} with the Heating element, a PWM module and the output\_control function to model this branch -of the efferent flow. We apply FMMD analysis to this {\fg} in table~\ref{tbl:heateroutput}. -For the \cf{output\_control} function, we have a pre-condition that the PWM module is +apply the demanded power. +% +A {\fg} with the Heating element, a PWM module and the \cf{output\_control} function is formed to model this branch +of the efferent flow. +% +FMMD analysis is applied to this {\fg} in table~\ref{tbl:heateroutput}. +% +For the \cf{output\_control} function, there is a pre-condition that the PWM module is configured and working, and has the correct clock frequency. % A second pre-condition is that the heating element is connected and working. -The post condition is that it sets the correct value into the PWM register +% +The post-condition is that it sets the correct value into the PWM register to implement the power output demand. - - -We now create a {\dc} called HeaterOutput +% +% +% +A {\dc} is created called HeaterOutput with the following failure modes: -$$fm(HeaterOutput) = \{ HeaterOnFull, HeaterOff, HeaterOutputIncorrect \}$$ - - - +$$fm(HeaterOutput) = \{ HeaterOnFull, HeaterOff, HeaterOutputIncorrect \} .$$ +% +As an aside: the $HeaterOnFull$ failure should raise alarm bells for a designers +and upon its discovery, measure may be recommended to inhibit this (such as perhaps +adding a safety relay to cut the power to the heater). +% +% \begin{figure}[h] \centering \includegraphics[width=300pt]{./CH5_Examples/euler_heater_output.png} @@ -1124,12 +1199,12 @@ $$fm(HeaterOutput) = \{ HeaterOnFull, HeaterOff, HeaterOutputIncorrect \}$$ \caption{Euler diagram showing HeaterOutput with its two hardware components, PWM and HEATER, and its software component output\_control.} \label{fig:eulerheateroutput} \end{figure} - - - - +% +% +% +% \subsubsection{Efferent flow: LED status LEDs} - +% The status LEDS will be controlled by general purpose (GPIO) I/O pins. % We could have, three LEDS, one flashing with a human readable mark @@ -1143,16 +1218,16 @@ would indicate to the operator that an error had occurred. The pre-condition for this function is that the GPIO is connected to working LEDS. % -The post condition is that the function \cf{setLEDS} will supply correct indication by flashing the LEDs. +The post-condition is that the function \cf{setLEDS} will supply correct indication by flashing the LEDs. +% +A {\fg} is formed from the GPIO, the LEDs and the software function \cf{setLEDs}. +% +FMMD analysis is applied to this {\fg} in table~\ref{tbl:ledoutput}. +% +% +% % -We form a {\fg} from the GPIO, the LEDs and the software function \cf{setLEDs}. % -We apply FMMD analysis to this {\fg} in table~\ref{tbl:ledoutput}. - - - - - \begin{figure}[h] \centering \includegraphics[width=300pt]{./CH5_Examples/euler_led_output.png} @@ -1161,86 +1236,94 @@ We apply FMMD analysis to this {\fg} in table~\ref{tbl:ledoutput}. and its software component setLEDS.} \label{fig:eulerheateroutput} \end{figure} - - +% +% Our {\dc} for the setLED function, GPIO and LEDs has the following failure modes: $$ fm(LEDoutput) = \{FailureIndicated, IndicationError \} $$ - - +% +% \subsubsection{Final Analysis Stage: PID Temperature Controller} - -The possibility of each software function failing its post condition without a direct +% +The possibility of each software function failing its post-condition without a direct underlying cause from one of its components has been included in each analysis stage -involving software. This is because software introduces the possibility of -anything going wrong! The common causes for software failing are: +involving software. +% +This is because software introduces the possibility of +anything going wrong! +% +The common causes for software failing are: \begin{itemize} \item Value/RAM corruption typically from interrupt contention problems~\cite{concurrency_c_tool} or accidental over writing~\cite{swseatbelt}, but can be from external sources such as radiation changing bits/values at runtime~\cite{5963919, 5488118}; \item Address bus errors leading to program errors (program sequence); \item ROM memory failures; \item Unintended behaviour of software. + \item Electro Magnetic Compatibility (EMC) interference. \end{itemize} Because the software is running on a medium, that of the processor or micro-controller, -our design at the final or highest level (see table~\ref{tbl:pid}), must include all possible failure modes of this medium i.e. +the FMMD analysis at the final or highest level (see table~\ref{tbl:pid}), must include all possible failure modes of this medium i.e. $$fm (micro-controller) =\{ PROM\_FAULT, RAM\_FAULT, CPU\_FAULT, ALU\_FAULT, CLOCK\_STOPPED \}.$$ -We perform the final FMMD stage by forming a functional group with the {\dcs} +The final FMMD stage forms a {\fg} with the {\dcs} determined previously: % \begin{itemize} - \item PID - \item HeaterOutput - \item LEDoutput - \item and the function `monitor'. + \item PID, + \item HeaterOutput, + \item LEDoutput, + \item the function \cf{monitor}. \end{itemize} - -The post condition for the monitor function is that it implements the PID control task correctly. +% +The post-condition for the monitor function is that it implements the PID control task correctly. \fmmdglossCONTRACTPROG We can now create a {\dc} for the standalone temperature controller, and give it the name TempController. It will have the following failure modes: - +% $$fm ( TempController ) = \{ ControlFailureIndicated, ControlFailure, \\ KnownIndicationError, UnknownIndicationError \}.$$ - - -We can now represent this failure mode analysis as an Euler diagram, see figure~\ref{fig:euler_temp_controller}. - - +% +% +The failure mode analysis of the complete PID controller is represented +as an Euler diagram in figure~\ref{fig:euler_temp_controller}. +% +% \begin{figure}[h] \centering - \includegraphics[width=300pt]{./CH5_Examples/euler_temp_controller.png} + \includegraphics[width=400pt]{./CH5_Examples/euler_temp_controller.png} % euler_temp_controller.png: 714x251 pixel, 72dpi, 25.19x8.85 cm, bb=0 0 714 251 \caption{Euler diagram of the temperature controller final anaysis stage, showing the hybrid software/hardware {\dcs} and the function at the head of the call tree `monitor'.} \label{fig:euler_temp_controller} \end{figure} - +% \subsection{Conclusion: Standalone system, PID Temperature Controller} - +% The PID temperature control example above, shows that complete hybrid software/electronic systems can be modelled using FMMD. % -The analysis has revealed system level failure modes that are un-handled and some that are unobservable, -but using the FMMD analysis we can trace to the low level modules that are the cause of unobservable +The analysis has revealed system level failure modes that are un-handled and some that are undetectable, +but using the results from FMMD analysis we can trace down to the low level modules that are the cause of problematic failure modes. \fmmdglossOBS % This means that by using FMMD, we can identify the sub-systems which require -re-design to eliminate unobservable failure modes. +re-design to eliminate or reduce the likelihood of undetectable failure modes. +% The demands of EN61508~\cite{en61508} for minimum safe failure fraction thresholds~\cite{scsh}[p.52] associated with SIL levels, make this a desirable feature of any FMEA based methodology. % For the failure modes caused -by electronics we can apply reliability statistics. +by electronics we can apply reliability statistics, and possibly use higher rated +components instead of expensive re-design. % For software errors, we could, if necessary provide extra functions to provide self checking. We could follow EN61508 high reliability software measures such as duplication of functions with checking functions arbitrating them (diverse programming~\cite{en61508}[C.3.5]). % We could for instance validate the processor clocking with an external watchdog and a simple -communications protocol. For PROM and RAM faults we can implement measures such as checksums +communications protocol. For PROM and RAM faults we can implement measures such as run-time checksums and ram complement checking. % -Using FMMD on these extra safety measures we can ensure no single failure could lead to a -system failure, something impossible with current FMEA techniques. +Using FMMD in conjunction with extra safety measures we can ensure no single failure could lead to a +system failure, something difficult to prove with current FMEA techniques. diff --git a/submission_thesis/colophon/copy.tex b/submission_thesis/colophon/copy.tex index 2664eac..e56db7b 100644 --- a/submission_thesis/colophon/copy.tex +++ b/submission_thesis/colophon/copy.tex @@ -19,11 +19,11 @@ the University of Brighton, pushing me forward in clarity of self-expression, precision through mathematics, critical assessment and carefully crafted English: its members will always remain dear to me. % -%%%% IS THIS BIT A BIT MAD???? -Like an army recruits training Sergeant Major I found them -hard task masters at first, and then, as with realising the rationale behind training and -{\em even} parade drill, respected and grew to like them. -% +%%%% IS THIS BIT A BIT MAD???? YES! 27AUG2013 +% % % Like an army recruits training Sergeant Major I found them +% % % hard task masters at first, and then, as with realising the rationale behind training and +% % % {\em even} parade drill, respected and grew to like them. +% % % % % My first debt of gratitude must go to my supervisors, Dr. A. Fish, diff --git a/submission_thesis/style.tex b/submission_thesis/style.tex index 4e167de..d7ade01 100644 --- a/submission_thesis/style.tex +++ b/submission_thesis/style.tex @@ -9,6 +9,7 @@ \newcommand{\ft}{\ensuremath{4\!\!\rightarrow\!\!20mA} } +\newcommand{\tenfifty}{\ensuremath{10\!\!\rightarrow\!\!50mA} } \usepackage{graphicx} \usepackage{fancyhdr} \usepackage{tikz}