From 80e47170bdc41424a57f20e475b7f030de10f8e3 Mon Sep 17 00:00:00 2001 From: "Robin P. Clark" Date: Wed, 1 Apr 2015 11:17:54 +0100 Subject: [PATCH] morning edit --- mybib.bib | 7 +- papers/JOURNAL_fmea_sw_hw/sw_hw_fmea.tex | 224 ++++++++++++++++++----- 2 files changed, 183 insertions(+), 48 deletions(-) diff --git a/mybib.bib b/mybib.bib index b1adfaa..ffbc67d 100644 --- a/mybib.bib +++ b/mybib.bib @@ -1241,7 +1241,12 @@ ISSN={0098-5589},} howpublished = "British standards Institution http://www.bsigroup.com/", year = "2002" } - + @MISC{en61511, + author = "E N Standard", + title = "Functional safety of electrical/electronic/ programmable electronic safety-related systems. Requirements for electrical/electronic/ programmable electronic safety-related systems ", + howpublished = "British standards Institution http://www.bsigroup.com/", + year = "2010" + } @Manual{lm358, title = {Datasheet: Low-Power dual operation amplifiers LM158,LM258,LM358: Doc ID 2163 Rev 10}, diff --git a/papers/JOURNAL_fmea_sw_hw/sw_hw_fmea.tex b/papers/JOURNAL_fmea_sw_hw/sw_hw_fmea.tex index 9327a3b..c9c65f4 100644 --- a/papers/JOURNAL_fmea_sw_hw/sw_hw_fmea.tex +++ b/papers/JOURNAL_fmea_sw_hw/sw_hw_fmea.tex @@ -27,7 +27,11 @@ %\newboolean{paper} %\setboolean{paper}{true} % boolvar=true or false \newcommand{\derivec}{{D}} -\newcommand{\ft}{\ensuremath{4\!\!\rightarrow\!\!20mA} } + +%\newcommand{\fti}{{ \ensuremath{4\mA \; \rightarrow \; 20mA} }} +\newcommand{\fti}{4mA~to~20mA} +\newcommand{\ftt}{FTTI} + \newcommand{\permil}{\ensuremath{{ }^0/_{00}}} \newcommand{\oc}{\ensuremath{^{o}{C}}} \newcommand{\adctw}{{${\mathcal{ADC}}_{12}$}} @@ -59,6 +63,11 @@ failure mode of the component or sub-system}}} \newcommand{\pecgloss}{\glossary{name={PEC},description={A Programmable Electronic controller, will typically consist of sensors and actuators interfaced electronically, with some firmware/software component in overall control}}} \newcommand{\bcfm}{base~component~failure~mode} \newcommand{\cf}[1]{\textbf{#1()}} +\newcommand{\swhw}{software~hardware} +\newcommand{\sw}{software} +\newcommand{\hw}{hardware} +\newcommand{\uP}{micro~processor} + \def\layersep{1.8cm} @@ -174,21 +183,26 @@ integrated electronics/software system. FMEA stands for Failure Mode Effects Analysis. % -All components used to build a system can fail. -They may fail in more than one way. -The ways in which a component can fail, are known as its failure modes. +All components used to build a system can fail, also +they may fail in more than one way. +The ways in which a component can fail, are known as its {\fms}. -At its simplest FMEA means taking taking a failure mode of a component and predicting +At its simplest FMEA means taking taking a {\fm} of a component and predicting what problems it may cause for the system it is part of. % -One way the electronic component the resistor can fail for instance, is it +One way the electronic component the resistor can fail for instance, is if it were to go open circuit. It could be because it was not soldered on properly and fell off, -it could have had an internal mechanical fault or it could be burnt off by too much +it could have had an internal mechanical fault or it could have been destroyed/burnt~off by too much electrical current. The cause does not matter. The fact that it can fail by going open circuit does. % -This then is one of the failure modes of a resistor, $OPEN$. +This then is one of the {\fms} of a resistor, $OPEN$. % For instance, an FMEA scenario could be a resistor in a system going $OPEN$. % circuit. +% +The investigator examines the electrical circuit with the resistor +in it, and using logic and reasoning, works out how the circuit would react +with that resistor failing $OPEN$. + % If the resistor was part of an amplifier in the circuit it could be predicted say, that a particular reading, @@ -206,7 +220,7 @@ The central concept of FMEA is that if all component failures are known, by analysing them the failure behaviour of a system can be determined. % This means looking at every component in the system, and for each of those components -examining all known failure modes in the context of the system that it is in. +examining all known failure modes in the context of the system that it is part of. % Various handbooks and international standards list common components and their know failure modes, often with accompanying statistics~\cite{en298, fmd91, mil1991}. @@ -361,18 +375,23 @@ This report is the end product of an FMEA investigation. % -% Several variants of FMEA exist, -% but the three in main use are: -% \begin{itemize} -% \item Deisgn FMEA (DFMEA) is FMEA applied at the design or approvals stage~\cite{en298, en230} -% where the aim is to ensure that single component failures (at least) cannot -% cause unacceptable system level events~\cite{~\cite{iec60812}fmea}, -% \item Failure Mode Effect Criticality Analysis (FMECA) is applied to determine the most potentially dangerous or damaging -% failure modes to fix, using FMEA in conjunction with severity and failure probability figures~\cite{fmeca,mil1991,fmd91}, -% \item Failure Mode Effects and Diagnostics Analysis, is FMEA peformed to -% determine a statistical level of safety. -% This is associated with Safety Integrity Levels (SIL)~\cite{en61508}~\cite{en61511} classification. -% \end{itemize} +Several variants of FMEA exist, +but the three in main use are: + +\begin{itemize} + + \item Design FMEA (DFMEA) is FMEA applied at the design or approvals stage~\cite{en298, en230} +where the aim is to ensure that single component failures (at least) cannot +cause unacceptable system level events~\cite{iec60812,boffin}, + + \item Failure Mode Effect Criticality Analysis (FMECA) is applied to determine the most potentially dangerous or damaging +failure modes to fix, using FMEA in conjunction with severity and failure probability figures~\cite{fmeca,mil1991,fmd91}, + + \item Failure Mode Effects and Diagnostics Analysis, is FMEA performed to +determine a statistical level of safety. This is a fairly standard FMEA but with statistical values attached to each component {\fm}; +this is associated with the European standard EN61508~\cite{en61508} and is commonly termed Safety Integrity Level (SIL)~\cite{en61511} classification. + +\end{itemize} \subsection{Concept of `reasoning~distance'.} @@ -617,7 +636,7 @@ system. \section{FMEA and modularity.} Because modern electronics has become more complex the number -of basic components in atypical safety critical system has risen dramatically. +of basic components in a typical safety critical system has risen dramatically. % % @@ -654,8 +673,6 @@ A miss-spelled variable could cause chaos. Also it was often difficult to pull a function out of one program and place it in another if it used some of the global variables. - - Newer computer languages were invented where modularity was encouraged. Instead of FORTRANs global scope for variables, individual functions in a newer languages like `C' started to have `local' variables. This meant that @@ -667,20 +684,19 @@ which grouped functions and data together into modules called classes, where even the internal local variables of a class could be hidden from the programmer using the class. For instance the internal workings of a binary~tree or linked~list do no need to be accessed if you simply want to use a class in your program to store data: in this case -your would pick a ready written and well de-bugged data store class and simply use it. +the programmer would pick a ready written and well de-bugged data store class and simply use it. % Software expanded in complexity faster than electronics, and to cope with this software languages developed modularity (function call trees, classes and finally distributed processing mechanisms). % FMEA has, by necessity, started to include some modular features but none yet -have defined mechanisms for ensuring that all failure modes -from a module must be considered in the analysis of the module(s) -that incorporate it. +have defined mechanisms for ensuring that all component failure modes +are traceable from component to system level. % in the analysis of the module(s) that incorporate it. -\paragraph{Modularisation in safey analysis in the automotive industry.} +\paragraph{Modularisation in safety analysis in the automotive industry.} The automotive industry, because of mass production, must make products that have high safety integrity %that are very safe but % financial pressure keeps their products @@ -735,6 +751,33 @@ this can introduce errors into the reliability calculations~\cite{MILSTD1629shor and miss-out some component failure modes. % +\paragraph{Integrated Circuits (ICs)} + +Consider some commonly used ICs an op-amp +is a good example. +% +An op-amp will have a high internal component count. +It is mainly a collection of transistors on a chip +and is a complex circuit designed to give a very high and precise gain. +%These are made from several components including +%ransistos, resistors capactors etc. +In order to perform FMEA op-amps are given +failure modes in the literature~\cite{fmd91, mil1991} +as though they are simple base components. +% +This is a form of modularisation. +% +%It is assumed that with experience and analysis +%the op-amp failure modes were compiled. +% +This has effectively become a precedent %starting point +for modularisation of FMEA. % by stealth! +% +%It also sets . +If it is acceptable to model a complex IC as a component, assigning it a set of failure modes, +it should be possible to analyse sections of a larger circuit +and treat those sections as components in their own right. + % % \paragraph{Top Down or Bottom-up?} % % Because FMEA is a bottom up technique, applying a top down analysis (as in FMECAs indenture levels) @@ -956,7 +999,7 @@ It also means that a function~group can contain other functional~groups without dragging along the semantic baggage that comes with the terms `module' and 'sub-system'. -\section{The proposed Methodology} +\section{The proposed Methodology: description} \label{fmmdproc} % %% One line @@ -983,8 +1026,62 @@ bottom-level component failure modes would be handled/used. % Starting at the bottom means having to deal with each component failure mode from the beginning. +\section{The proposed Methodology: quick guide or `how~to'.} -\paragraph{FMMD process.} +An FMEA typically begins with a parts list and then from that a series +of entries for each component failure mode. +Often these will be listed in the order they are found +on the parts list. + +With FMMD a different approach is taken. +The Engineer will examine the circuit schematic and look for {\fgs}. +That is small collections of components that work together +to perform a function. +Once the circuit has been analysed so that all components +have been collected into a {\fg} the first stage of analysis begins. +% +Each {\fg} is analysed and its symptoms of failure are listed. +It is then treated as a {\dc} and given a name. +% +Where there are repeated sections of circuitry these +could share the same name but take an index number (for instance +were a circuit to contain several ${\fti}$ inputs +they could be named ${\ftt}_1$ ${\ftt}_2$ etc.). +This also means the analysis of the ${\fti}$ circuit {\fg} need only be performed once. +% +This gives the first stage of {\dcs}. + +These {\dcs} are now treated as components and used to form {\fgs}. +Eventually a hierarchy will be built until the whole +system is included. The top level failure symptoms are the ways in which the system can fail. + +An advantage of this, is that all component failure modes must be considered +in terms of their effects as the system goes from the +lower levels through to more abstract system level failures. +This can lead to surprises. Often when a system is evaluated +by FMMD a list of system level failures can include ones +that are not currently dealt with or even detectable +without some re-design. Having surprises at the design +and not in~the~field is a very good thing +when dealing with safety critical systems! + + +Because the ways in which a software function can fail can be listed +it too can be treated as an FMMD {\fg}. +%Software functions are treated as components as well, and +%treat the hardware they interface to (if any) as components. +A software functions `components' are the software functions it calls +and the hardware elements it interfaces to (if any. +but eventually +all software hierarchies reach down to hardware, or they would not do anything in the real world). + +An example of a hardware low level analysis is given in~\cite{syssafe2011} and a combined +software hardware sub-system in~\cite{syssafe2012}. Examples of both, including analysis of performance +can be found in~\cite{clark}. + +FMMD is described in more detail in the section below. + +\paragraph{FMMD process description} To ensure all component failure modes are modelled and traceable through stages of analysis, the new methodology must be bottom-up. % @@ -1094,6 +1191,7 @@ For electrical and mechanical systems, although the original system designers concepts of modularity and sub-systems in design may provide guidance, applying FMMD means deciding on the members for {\fgs} and the subsequent hierarchy. + % \section{Example for analysis} % : How can we apply FMEA} % % @@ -1150,6 +1248,8 @@ A structured analysis `Yourdon' context diagram~\cite{Yourdon:1989:MSA:62004} is A Yourdon context diagram shows an overview of a system, with the data inputs and data outputs. The circle in the middle defines the processing applied to those inputs and outputs. The context diagram can be later refined by introducing more circles with data paths between them. +Finally a {\swhw} hierarchy can be derived from a Yourdon diagram, which assists +in the design of hybrid {\swhw} systems. % \begin{figure}[h]+ @@ -1286,8 +1386,20 @@ that will be used in the temperature controller are now defined. These are listed, and from the bottom-up, FMMD analysis is begun. % \clearpage -\subsection{FMMD Analysis of PID temperature Controller} +%\subsection{FMMD Analysis of PID temperature Controller} % + +% +\subsection{Temperature Controller Hardware Elements FMMD.} +% + +The hardware elements of this project have been analysed using FMMD +and can be found in~\cite{clark, syssafe2011, syssafe2012}. +Being able to re-use analysis work is another advantage of using modularised FMEA. +Even complex constructs such as +circuitry connected to a {\uP} which reads voltages +into {\sw} functions can be picked up from one project and simply re-used in another. + To summarise from the design stage, the electronic components identified thus far: \begin{itemize} @@ -1300,16 +1412,14 @@ the electronic components identified thus far: \item LEDs --- Indication LEDs via GPIO, \item micro-controller --- the medium for running the software. \end{itemize} -% -\subsection{Temperature Controller Hardware Elements FMMD.} -% - -NEED BETTER REFS HERE FOR THE -SOURCES FOR THE FAILURE MODES OF COMPONENTS> +Each electronic {\dc} will be described and cited in more detail below. \paragraph{ADCMUX and Read\_ADC.} -We re-use the {\dc} from section~\ref{readADC}. +The {\dc} from \cite{syssafe2012} is re-used for this analysis. %section~\ref{readADC}. +This analysis was performed on a `C' function which +read a value from an analogue to digital converter (ADC) hardware element. +The analysis revealed that it could fail in three ways. $$ fm(RADC) = \{ VV\_ERR, HIGH, LOW \} .$$ % % @@ -1336,7 +1446,10 @@ It therefore has the same failure modes as a resistor: $$fm(HEATER) = \{ OPEN, SHORT \} .$$ % \paragraph{Pt100 Platinum Temperature Sensor.} -The Pt100 four wire configuration was analysed in section~\ref{sec:Pt100}, the {\dc} is re-used here: + +The four wire Pt100 configuration is commonly used in safety critical designs. +For single failure analysis this circuit has only one failure mode. +The Pt100 four wire configuration was analysed in section~\ref{clark}[5.6], the {\dc} is re-used here: $$ fm(Pt100) = \{ OUT\_OF\_RANGE \} . $$ % % @@ -1689,16 +1802,31 @@ as an Euler diagram in figure~\ref{fig:euler_temp_controller}. The PID temperature control example above, shows that complete hybrid software/electronic systems can be modelled using FMMD. % -This analysis has revealed system level failure modes that are un-handled and some that are undetectable. The FMMD model can be traversed from undesirable top level failures to the {\bc} {\fms} that are the causes. %\fmmdglossOBS +%% +This analysis has revealed system level failure modes that are un-handled and some that are undetectable. +% +While this may appear poor, with FMMD the undetectable failures and unhandled are actually known: they +are present in the model because they came from the components {\fms}. % This means that by using FMMD, the sub-systems which require re-design to eliminate or reduce the likelihood of undetectable failure modes can be identified. % +Each system {\fm} of concern can be traced back to the components that caused them. +% +The components can be strengthened or additional self diagnostics can be applied to +alleviate the problems. +% The demands of EN61508~\cite{en61508} for minimum safe failure fraction thresholds~\cite{scsh}[p.52] associated with SIL levels, make this a desirable feature of any FMEA based methodology. % +This is because the system {\fms} can be traced back to component {\fms} which +will should have published reliability statistics~\cite{fmd91}. +% +With the reliability statistics the SIL dangerous failure probabilities can be listed and summed +providing data to classify the SIL level. +% For the failure modes caused by electronics, reliability statistics can be applied, and the possibilities of using higher rated components instead of potentially expensive re-design can be simulated/modelled. @@ -1728,19 +1856,21 @@ and ram complement checking can be applied. \section{Conclusion} -Effeciency --- the $O(N^2)$ has been broken down by making it -several much easier to deal with $O(n^2)$ analyis stages. +%% NEED TO LIST THE WIDH LIST HERE AND ANSWER ALL POINTS -While there are no FMEA metrics to compare a sw hw hybrid +Efficiency --- the $O(N^2)$ has been broken down by making it +several much easier to deal with $O(N^2)$ analysis stages. + +While there are no FMEA metrics to compare a {\swhw} hybrid using FMMD an estimate of the work to perform, the reasoning distance, can be calculated. -hw sw interface is handled naturally. Any hw failures +The {\swhw} interface is handled naturally. Any {\hw} failures can now no longer be missed or forgotten in the analysis process. -The sw faces no suprise hw errors that it has no sensible +The {\sw} faces no surprise {\hw} errors that it has no sensible way of dealing with. -Errors introduced by the uP are unresolved in this example. But they are listed. +Errors introduced by the {\uP} are unresolved in this example. But they are listed. Re-useability --- the electronics --- the Pt100 --- s/w functions to read ADC values