diff --git a/mybib.bib b/mybib.bib index b1e5c9b..d886fcb 100644 --- a/mybib.bib +++ b/mybib.bib @@ -121,6 +121,37 @@ Database keywords = "fault-tolerance" } +@article{syssafe2011, + title = "Developing a rigorous bottom-up modular static failure modelling methodology", + journal = "6th IET International Conference on System Safety, 2011", + volume = "", + number = "", + pages = "", + year = "2011", + note = "6th IET International Conference on System Safety, 2011", + issn = "", + doi = "", + url = "", + author = "R.P. Clark et all", + keywords = "Failsafe", + keywords = "EN298", + keywords = "gas-safety", + keywords = "burner", + keywords = "control", + keywords = "fault", + keywords = "double-fault", + keywords = "single-fault", + keywords = "fault-tolerance" +} + +Developing a rigorous bottom-up modular static failure modelling methdology +Author: + +Clark, R +Publication: + +6th IET International Conference on System Safety, 2011 + @ARTICLE{ontfmea, AUTHOR = "Lars Dittman et all", TITLE = "FMEA using Ontologies", @@ -225,6 +256,13 @@ Database YEAR = "1992" } +@BOOK{dbcbe, + AUTHOR = "R. Mitchel", + TITLE = "Design By Contract by Example", + PUBLISHER = "Adisson-Wesley", + YEAR = "2002" +} + @BOOK{opmanage, AUTHOR = "Roger Schroeder", TITLE = "Operations Management: Contemporary Concepts and Cases ISBN: 978-0073403380", diff --git a/papers/fmmd_software_hardware/fmmdh.dia b/papers/fmmd_software_hardware/fmmdh.dia index 6f3f040..eb21d8f 100644 Binary files a/papers/fmmd_software_hardware/fmmdh.dia and b/papers/fmmd_software_hardware/fmmdh.dia differ diff --git a/papers/fmmd_software_hardware/hd.dia b/papers/fmmd_software_hardware/hd.dia index 9d8a730..f0acfbe 100644 Binary files a/papers/fmmd_software_hardware/hd.dia and b/papers/fmmd_software_hardware/hd.dia differ diff --git a/papers/fmmd_software_hardware/software_fmmd.tex b/papers/fmmd_software_hardware/software_fmmd.tex index b86975a..8a7d310 100644 --- a/papers/fmmd_software_hardware/software_fmmd.tex +++ b/papers/fmmd_software_hardware/software_fmmd.tex @@ -99,9 +99,9 @@ failure mode of the component or sub-system}}} \setlength{\topmargin}{0in} \setlength{\headheight}{0in} \setlength{\headsep}{0in} -%\setlength{\textheight}{22cm} +\setlength{\textheight}{22cm} \setlength{\textwidth}{18cm} -\setlength{\textheight}{24.35cm} +%\setlength{\textheight}{24.35cm} %\setlength{\textwidth}{20cm} \setlength{\oddsidemargin}{0in} \setlength{\evensidemargin}{0in} @@ -120,8 +120,8 @@ failure mode of the component or sub-system}}} % {subsection}{2}{0mm}% % {-\baslineskip} % {0.5\baselineskip} -% {\normalfont\normalsize\itshape}} -\linespread{0.6} +% {\normalfont\normalsize\itshape}}% +\linespread{0.95} \begin{document} %\pagestyle{fancy} @@ -155,23 +155,37 @@ failure mode of the component or sub-system}}} %endurance and Electro Magnetic Compatibility (EMC) testing. Theoretical, or 'static testing', %is often also required. % -Failure Mode Effects Analysis (FMEA), is a bottom-up technique that aims to assess the effect all -component failure modes on a system. -It is used both as a design tool (to determine weaknesses), and is a requirement of certification of safety critical products. -FMEA has been successfully applied to mechanical, electrical and hybrid electro-mechanical systems. +%Failure Mode Effects Analysis (FMEA), is a bottom-up technique that aims to assess the effect all +%component failure modes on a system. +%It is used both as a design tool (to determine weaknesses), and is a requirement of certification of safety critical products. +%FMEA has been successfully applied to mechanical, electrical and hybrid electro-mechanical systems. % -Work on software FMEA (SFMEA) is beginning, but -at present no technique for SFMEA that -integrates hardware and software models % known to the authors -exists. +%Work on software FMEA (SFMEA) is beginning, but +%at present no technique for SFMEA that +%integrates hardware and software models % known to the authors +%exists. % % + +% +%Failure modes in components in say a sensor, could be traced +%up through the electronics and then through the controlling software. +% +%Presently Failure Mode Effects Analysis (FMEA), stops at the glass ceiling of the computer program. +% +This paper presents a modular variant of Failure Mode Effects Analysis (FMEA), +Failure Mode Modular De-Composition (FMMD), a methodology which +can be applied to software, and is compatible +and integrable with FMMD performed on mechanical and electronic systems. +% Software generally sits on top of most modern safety critical control systems and defines its most important system wide behaviour and communications. +% Currently standards that demand FMEA for hardware (e.g. EN298, EN61508), do not specify it for software, but instead specify, good practise, review processes and language feature constraints. % This is a weakness. +% Where FMEA % scientifically traces component {\fms} to resultant system failures, software has been left in a non-analytical @@ -180,16 +194,9 @@ limbo of best practises and constraints. If software and hardware integrated FMEA were possible, electro-mechanical-software hybrids could be modelled, and could thus be `complete' failure mode models. % -Failure modes in components in say a sensor, could be traced -up through the electronics and then through the controlling software. -% -Presently FMEA, stops at the glass ceiling of the computer program. -% -This paper presents a modular variant of FMEA, Failure Mode Modular De-Composition (FMMD), a methodology which -can be applied to software, and is compatible -and integrable with FMMD performed on mechanical and electronic systems. +Presently FMEA, stops at the glass ceiling of the computer program: FMMD seeks to address +this, and offers additional test efficiency benefits. } - %\today \nocite{en298} \nocite{en61508} @@ -246,15 +253,17 @@ is a cause for criticism~\cite{safeware}. \subsection{Current work on Software FMEA} -Work on SFMEA usually does not seek to integrate +SFMEA usually does not seek to integrate hardware and software models, but to perform FMEA on the software in isolation~\cite{procsfmea}. -Some work has been performed using databases +% +Work has been performed using databases to track the relationships between variables -and system failure modes~\cite{procsfmeadb}, work has been performed to -introduce automation into the FMEA process~\cite{appswfmea} and code analysis +and system failure modes~\cite{procsfmeadb}, to %work has been performed to +introduce automation into the FMEA process~\cite{appswfmea} and to provide code analysis automation~\cite{modelsfmea}. Although the SFMEA and hardware FMEAs are performed separately -some schools of thought aim for FTA~\cite{nasafta,nucfta} (top down - deductive) and FMEA (bottom-up inductive) +some schools of thought aim for Fault Tree Analysis (FTA)~\cite{nasafta,nucfta} (top down - deductive) +and FMEA (bottom-up inductive) to be performed on the same system to provide insight into the software hardware/interface~\cite{embedsfmea}. % @@ -267,9 +276,11 @@ through the top (and therefore ultimately controlling) layer of software. The main FMEA methodologies are all based on the concept of taking base component {\fms}, and translating them into system level events/failures~\cite{sfmea,sfmeaa}. +% In a complicated system, mapping a component failure mode to a system level failure -will mean a long reasoning distance; that is to say the actions of the failed component will have to be traced through -several sub-systems and the effects of other components on the way. +will mean a long reasoning distance; that is to say the actions of the +failed component will have to be traced through +several sub-systems, gauging its effects with other components. % With software at the higher levels of these sub-systems, we have yet another layer of complication. @@ -296,7 +307,9 @@ failure mode model for it, modelling the software to hardware interface becomes far simpler. % The failure mode model, would give us the ways in which the signal conditioning -and multiplexer could fail. We can use this to work out how our software +and multiplexer could fail. +% +We can use this to work out how our software could fail, and with this create a modular FMEA model of the software. @@ -305,9 +318,9 @@ could fail, and with this create a modular FMEA model of the software. In outline, in order to modularise FMEA, we must create small modules from the bottom-up. We can do this by taking collections of base~components that -perform (ideally) a simple and well defined task. +perform (ideally) a simple and well defined task called {\fgs}. % -We can call these {\fgs}. We can then analyse the failure mode behaviour of a {\fg} +We can then analyse the failure mode behaviour of a {\fg} using all the failure modes of all its components. % When we have its failure mode behaviour, or the symptoms of failure from the perspective of the {\fg}, @@ -375,8 +388,9 @@ of the {\fg} from which it was derived. % in a specific configuration. This specific configuration corresponds to % a {\fg}. Our use of it as a building block corresponds to a {\dc}. -We can use the symbol `$\derivec$' to represent the creation of a derived component -from a {\fg}. This symbol is convenient for drawn hierarchy diagrams. % (see figure~\ref{fmmdh}). +We use the symbol `$\derivec$' to represent the creation of a derived component +from a {\fg}. This symbol is convenient for drawn hierarchy diagrams. +% % (see figure~\ref{fmmdh}). We define the $\derivec$ function, where $\FG$ is the set of all {\fgs} and $\DC$ is the set of all {\dcs}, $ \derivec ( {\FG} ) \mapsto {\DC} .$ We show an FMMD hierarchy in figure~\ref{fig:fmmdh}. @@ -395,21 +409,37 @@ Now that we have {\dcs}, we can use them to form a higher level functional group We apply the same FMEA process to this and can derive a top level derived component (which has the system---or top---level failure modes). -\begin{figure} +\begin{figure}[h] \centering - \includegraphics[width=150pt]{./fmmdh.png} - % fmmdh.png: 365x405 pixel, 72dpi, 12.88x14.29 cm, bb=0 0 365 405 + \includegraphics[width=150pt,keepaspectratio=true]{./fmmdh.png} + % fmmdh.png: 256x289 pixel, 72dpi, 9.03x10.20 cm, bb=0 0 256 289 \caption{FMMD Hierarchy} \label{fig:fmmdh} \end{figure} +% \begin{figure}[h] +% \centering +% \includegraphics[width=120pt,keepaspectratio=true]{./fmmdh.png} +% % fmmdh.png: 256x289 pixel, 72dpi, 9.03x10.20 cm, bb=0 0 256 289 +% \caption{FMMD Hierarchy} +% \label{fig:fmmdh} +% \end{figure} + +% \begin{figure} +% \centering +% \includegraphics[width=150pt]{./fmmdh.png} +% % fmmdh.png: 365x405 pixel, 72dpi, 12.88x14.29 cm, bb=0 0 365 405 +% \caption{FMMD Hierarchy} +% \label{fig:fmmdh} +% \end{figure} + Note the diagram of the FMMD hierarchy is very similar to a simple non-recursive programmatic function call tree. \section{Software: How can we apply FMEA} -If FMEA can be applied to software we can build complete failure models -of typical modern safety critical systems. +%If FMEA can be applied to software we can build complete failure models +%of typical modern safety critical systems. With modular FMEA i.e. FMMD %(FMMD) we have the concepts of failure~modes of components, {\fgs} and symptoms of failure for a functional group. @@ -417,7 +447,9 @@ of components, {\fgs} and symptoms of failure for a functional group. A programmatic function has similarities with a {\fg} as defined by the FMMD process. % An FMMD {\fg} is placed into a hierarchy. +% A software function is placed into a hierarchy, that of its call-tree. +% A software function typically calls other functions and uses data sources via hardware interaction, which could be viewed as its `components'. It has outputs, i.e. it can perform actions on data or hardware @@ -431,13 +463,14 @@ and the hardware from which it reads values.% from. % Its outputs are the data it changes, or the hardware actions it performs. -When we have analysed a software function---using failure conditions -of its inputs as failure modes---we can -determine its symptoms of failure (i.e. how calling functions will see its failure mode behaviour). +When we have analysed a software function---treating failure conditions +of its inputs as `{\fms}'---we can +determine its symptoms of failure. % (i.e. how calling functions will see its failure mode behaviour). % We can thus apply the $\derivec$ function to software functions, by viewing them in terms of their failure -mode behaviour. To simplify things as well, software already fits into a hierarchy. -For Electronics and Mechanical systems, although we may be guided by the original designers +mode behaviour. To simplify things, software already fits into a hierarchy. +% +For electronic and mechanical systems, although we may be guided by the original designers concepts of modularity and sub-systems in design, applying FMMD means deciding on the members for {\fgs} and the subsequent hierarchy. With software already written, that hierarchy is fixed/given. @@ -452,7 +485,7 @@ and the subsequent hierarchy. With software already written, that hierarchy is f \subsection{Software, a natural hierarchy} Software written for safety critical systems is usually constrained to -be modular~\cite{en61508}[3] and non recursive~\cite{misra}[15.2]. %{iec61511}. +be modular~\cite{en61508}[vol.3] and non recursive~\cite{misra}[15.2]. %{iec61511}. Because of this we can assume a direct call tree. % Functions call functions @@ -477,7 +510,7 @@ Contract programming is a discipline~\cite{dbcbe} for building software function and traceable way. Each function is subject to pre-conditions (constraints on its inputs), post-conditions (constraints on its outputs) and function wide invariants (rules). % -\paragraph{Mapping contract `pre-condition' violations to failure modes} +%\paragraph{Mapping contract `pre-condition' violations to failure modes.} % A precondition, or requirement for a contract software function defines the correct ranges of input conditions for the function @@ -486,13 +519,14 @@ to operate successfully. For a software function, a violation of a pre-condition is in effect a failure mode of `one of its components'. % -\paragraph{Mapping contract `post-condition' violations to symptoms} +%\paragraph +{Mapping contract `post-condition' violations to symptoms} % A post condition is a definition of correct behaviour by a function. A violated post condition is a symptom of failure of a function. Post conditions could be either actions performed (i.e. the state of hardware changed) or an output value of a function. % -\paragraph{Mapping contract `invariant' violations to symptoms and failure modes} +%\paragraph{Mapping contract `invariant' violations to symptoms and failure modes} % Invariants in contract programming may apply to inputs to the function (where they can be considered {\fms} in FMMD terminology), and to outputs (where they can be considered {failure symptoms} in FMMD terminology). @@ -619,7 +653,8 @@ Its job is to select the correct channel (ADC multiplexer) and then to initiate conversion by setting an ADC 'go' bit (see code sample in figure~\ref{fig:code_read_ADC}). % It takes the raw ADC reading and converts it into a -floating point\footnote{the type, `double' or `double precision', is a standard C language floating point type~\cite{kandr}.} +floating point\footnote{the type, `double' or `double precision', is a +standard C language floating point type~\cite{DBLP:books/ph/KernighanR88}.} voltage value. @@ -825,7 +860,7 @@ We now analyse this hardware/software combined {\fg}. & read & \\ \hline 2: ${VREF}$ & ADC volt-ref & $VV\_ERR$ \\ - & incorrect & \\ \hline \hline + & incorrect & \\ \hline @@ -896,8 +931,7 @@ software component $read\_4\_20\_input$, i.e. $G_3 = \{read\_4\_20\_input, RADC\ & outside range & $RANGE$ \\ \hline 2: $RADC_{VV_ERR}$ & voltage & $VAL\_ERR$ \\ - & incorrect & \\ \hline \hline - + & incorrect & \\ \hline 3: $RADC_{HIGH}$ & voltage value & $VAL\_ERR$ \\ @@ -934,33 +968,41 @@ $fm(R420I) = \{OUT\_OF\_RANGE, VAL\_ERR\} .$ We can now represent the software/hardware FMMD analysis as a hierarchical diagram, see figure~\ref{fig:hd}. +% \begin{figure}[h] +% \centering +% \includegraphics[width=60pt]{./hd.png} +% % hd.png: 363x520 pixel, 72dpi, 12.81x18.34 cm, bb=0 0 363 520 +% \caption{FMMD hierarchy with hardware and software elements} +% \label{fig:hd} +% \end{figure} + \begin{figure}[h] \centering - \includegraphics[width=60pt]{./hd.png} - % hd.png: 363x520 pixel, 72dpi, 12.81x18.34 cm, bb=0 0 363 520 - \caption{FMMD hierarchy with hardware and software elements} + \includegraphics[width=150pt,keepaspectratio=true]{./hd.png} + % hd.png: 416x381 pixel, 72dpi, 14.68x13.44 cm, bb=0 0 416 381 + \caption{FMMD Hierarchy for {\ft} input} \label{fig:hd} \end{figure} -We can represent the hierarchy in figure~\ref{fig:hd} algebraically, using the `$\derivec$' function -using the groups as intermediate stages: -% \begin{eqnarray*} -% G_1 &=& \{R,ADC\} \\ -% CMATV &=& \;\derivec (G_1) \\ -% G_2 &=& \{CMATV, read\_ADC \} \\ -% RADC &=& \; \derivec (G_2) \\ -% G_3 &=& \{ RADC, read\_4\_20\_input \} \\ -% R420I &=& \; \derivec (G_3) \\ -% \end{eqnarray*} -%or, -with a nested definition, -$ \derivec \Big( \derivec \big( \derivec(R,ADC), read\_4\_20\_input \big), read\_4\_20\_input \Big). $ -% -This nested structure means that we have multiple traceable -stages of failure mode reasoning in our analysis. Traditional FMEA would have only one stage -of reasoning for each component failure mode. +% We can represent the hierarchy in figure~\ref{fig:hd} algebraically, using the `$\derivec$' function +% using the groups as intermediate stages: +% % \begin{eqnarray*} +% % G_1 &=& \{R,ADC\} \\ +% % CMATV &=& \;\derivec (G_1) \\ +% % G_2 &=& \{CMATV, read\_ADC \} \\ +% % RADC &=& \; \derivec (G_2) \\ +% % G_3 &=& \{ RADC, read\_4\_20\_input \} \\ +% % R420I &=& \; \derivec (G_3) \\ +% % \end{eqnarray*} +% %or, +% with a nested definition, +% $ \derivec \Big( \derivec \big( \derivec(R,ADC), read\_4\_20\_input \big), read\_4\_20\_input \Big). $ +% % +% This nested structure means that we have multiple traceable +% stages of failure mode reasoning in our analysis. Traditional FMEA would have only one stage +% of reasoning for each component failure mode. % \section{Heuristic Comments on {\ft} Input Circuit} @@ -995,37 +1037,40 @@ of reasoning for each component failure mode. %\clearpage \section{Conclusion} % +The FMMD method has been demonstrated, using an the industry stanbdard {\ft} +input circuit and software. +% The {\dc} representing the {\ft} reader -in software shows that by taking a modular approach for FMEA, we can integrate -software and electro-mechanical FMEA models. +shows that by taking a modular approach for FMEA, i.e. FMMD, we can integrate +software and electro-mechanical models. % With this analysis -we have a complete `reasoning~path' linking the failures modes from the +we have stages along the `reasoning~path' linking the failures modes from the electronics to those in the software. -Each functional group to {\dc} transition represents a +Each {\fg} to {\dc} transition represents a reasoning stage. % With traditional FMEA methods the reasoning~distance is large, because it stretches from the component failure mode to the top---or---system level failure. -For this reason applying traditional FMEA to software stretches -the reasoning distance even further. +%For this reason applying traditional FMEA to software stretches +%the reasoning distance even further. % -We now have a {\dc} for a {\ft} input in software. -Typically, more than one such input could be present in a real-world system. -Not only have we integrated electronics and software in an FMEA, we can also -re-use the analysis for each {\ft} input in the system. +We now have a {\dc} for a {\ft} input. % in software. +Typically, more than one such input could be present in a real-world system: we can thus +%Not only have we integrated electronics and software in an FMEA, we can also +re-use this analysis for each {\ft} input in the system. % -The unsolved symptoms, or unobservable errors, i.e. $VAL\_ERR$ could be addressed -by another software function to read other known signals -via the MUX (i.e. voltage references). This strategy would -detect ADC\_STUCK\_AT and MUX\_FAIL failure modes. +%The unsolved symptoms, or unobservable errors, i.e. $VAL\_ERR$ could be addressed +%by another software function to read other known signals +%via the MUX (i.e. voltage references). This strategy would +%detect ADC\_STUCK\_AT and MUX\_FAIL failure modes. % -Detailing this however, is beyond the scope %and page-count -of this paper. +%Detailing this however, is beyond the scope %and page-count +%of this paper. % -A software specification for a hardware interface will concentrate on -how to interpret raw readings, or what signals to apply for actuators. -Using FMMD we can determine an accurate failure model for the interface as well. +%A software specification for a hardware interface will concentrate on +%how to interpret raw readings, or what signals to apply for actuators. +Additionally, using FMMD we can determine a failure model for the hardware/software interface. % interface as well. %Its solved. Hoooo-ray !!!!!!!!!!!!!!!!!!!!!!!! @@ -1040,7 +1085,8 @@ Using FMMD we can determine an accurate failure model for the interface as well. % %\today % % { %\tiny % -\tiny +%\tiny +\footnotesize \bibliographystyle{plain} \bibliography{../../vmgbibliography,../../mybib} }