From d695dc8441b2154620b96a0a480db6d809a96d64 Mon Sep 17 00:00:00 2001 From: Robin Clark Date: Wed, 16 Mar 2011 18:45:01 +0000 Subject: [PATCH] evening edit (he or she is eating!) --- fmmd_design_aide/fmmd_design_aide.tex | 114 ++++++++++++++++++-------- introduction/introduction.tex | 2 +- logic_diagram/logic_diagram.tex | 2 +- mybib.bib | 9 +- 4 files changed, 89 insertions(+), 38 deletions(-) diff --git a/fmmd_design_aide/fmmd_design_aide.tex b/fmmd_design_aide/fmmd_design_aide.tex index 76697a3..68897b0 100644 --- a/fmmd_design_aide/fmmd_design_aide.tex +++ b/fmmd_design_aide/fmmd_design_aide.tex @@ -5,7 +5,18 @@ paper describes how the FMMD methodology can be used to refine safety critical designs and identify undetectable and dormant faults. % -Once undetecable faults or dormant faults are discovered +Its uses an industry standard mill-volt amplifier +circuit, intended for reading thermocouples. +It has an inbuilt safety resistor which allows it +to detect the thermocouple becoming disconnected/going OPEN. +% +This circuit is analysed from an FMMD perspective and +and two undetectable failure modes are identified. +A `safety check' circuit is then proposed and analysed. +This has no undetectable failure modes, but does have one +`dormant' failure mode. +% +This paper shows that once undetectable faults or dormant faults are discovered the design can be altered (or have a safety component added), and the FMMD analysis process re-applied. This can be an iterative process applied until the design has an acceptable level safety. % of dormant or undetectable failure modes. @@ -21,14 +32,26 @@ This chapter describes how the FMMD methodology can be used to examine safety critical designs and identify undetectable and dormant faults. % -Once undetecable faults or dormant faults are discovered +Its uses an industry standard mill-volt amplifier +circuit, intended for reading thermocouples. +It has an inbuilt safety resistor which allows it +to detect the thermocouple becoming disconnected/going OPEN. +% +This circuit is analysed from an FMMD perspective and +and two undetectable failure modes are identified. +A `safety check' circuit is then proposed and analysed. +This has no undetectable failure modes, but does have one +`dormant' failure mode. +% +This paper shows that once undetectable faults or dormant faults are discovered the design can be altered (or have a safety component added), and the FMMD analysis process re-applied. -This can be an iterative process which can be applied until the -design has an acceptable level of safety. % dormant or undetectable failure modes. +This can be an iterative process applied until the +design has an acceptable level safety. % of dormant or undetectable failure modes. % Used in this way, its is a design aide, giving the user the possibility to refine/correct a {\dc} from the perspective of its failure mode behaviour. + } @@ -44,9 +67,18 @@ the failure mode behaviour is then viewed from the {\fg} perspective (i.e. as a symptoms of the {\fg}) and common symptoms are then collected. The final stage is to create a {\dc} which has the symptoms of the {\fg} -it was sourced from, as its failure modes. +it was sourced from, as its failure modes. +% +%From the failure mode behaviour of the {\fg} common symptoms are collected. +These common symptoms are % in effect +the failure mode behaviour of +the {\fg} viewed as an % single +entity, or a `black box' component. +% +From the analysis of the {\fg} we can create a {\dc}, where the failure modes +are the symptoms of the {\fg} we derived it from. } - +{ \paragraph{Overview of FMMD Methodology} To re-cap from chapter \ref{symptomex}, the principle of FMMD analysis is a five stage process, @@ -57,9 +89,6 @@ the failure mode behaviour is then viewed from the common symptoms are then collected. The final stage is to create a {\dc} which has the symptoms of the {\fg} it was sourced from, as its failure modes. - -{ - % %From the failure mode behaviour of the {\fg} common symptoms are collected. These common symptoms are % in effect @@ -69,37 +98,51 @@ entity, or a `black box' component. % From the analysis of the {\fg} we can create a {\dc}, where the failure modes are the symptoms of the {\fg} we derived it from. +} % -\paragraph{detectable and undetectable failure modes} +\paragraph{Undetectable failure modes.} The symptoms will be detectable (like a value out of range) or undetectable (like a logic state or value being incorrect). -The `undetectable' failure modes undertsandably, are the most worrying for the safety critical designer. -EN61058, the statistically based European Norm, using ratios +The `undetectable' failure modes understandably, are the most worrying for the safety critical designer. +EN61058~\cite{en61508}, the statistically based failure mode European Norm, using ratios of detected and undetected system failure modes to -classify the sytems safety levels and describes sub-clasifications -for detected and undetected failure modes~\cite{en61508}. +classify the systems safety levels and describes sub-clasifications +for detected and undetected failure modes. +%\gloss{DU} +%\gloss{DD} %It is these that are, generally the ones that stand out as single %failure modes. For instance, out of range values, are easy to detect by systems using the {\dc} supplying them. -Undetectable faults are ones that forward incorrect information -where we have no way of validating or testing it. +Undetectable faults are ones that supply incorrect information or states +where we have no way of knowing whether they are correct or not. % we know we can cope with; they %are an obvious error condition that will be detected by any modules %using the {\dc}. % -An undetecable failure mode can introduce serious +Undetectable failure modes can introduce serious errors into a SYSTEM. -\paragraph{dormant faults} A dormant fault is one -which can manifest its-self in conjuction with +\paragraph{Dormant faults.} +A dormant fault is one +which can manifest its-self in conjunction with another failure mode becoming active, or an environmental -condition changing (for instance temperature). Some +condition changing (for instance temperature). +Some component failure modes may lead to dormant failure modes. -By examining test cases from a functional group against all +For instance a transistor failing OPEN when it is meant +to be in an OFF state would be a dormant fault. +Even though the fault is active, the transistor +is, for the time being, behaving correctly. +% +If we examine the circuit from both operational states, +i.e. the transistor when is is both meant to be ON and OFF +we can determine all the consequences of that particular failure. +% +More generally, by examining test cases from a functional group against all operational states and germane environmental conditions we can determine all the failure modes of the {\fg}. @@ -143,7 +186,7 @@ We then analsye the {\fg} and the resultant {\dc} failure modes/symptoms are dis This circuit amplifies a milli-volt input by a gain of $\approx$ 184 ($\frac{150E3}{820}+1$) \footnote{The resistors used to program the gain of the op-amp would typically be of a $ \le 1\%$ guaranteed -tolerance. In practise, the small variations would be corrected with software constants prorgammed during production +tolerance. In practise, the small variations would be corrected with software constants programmed during production test/calibration.}. An offset is applied to the input by R18 and R22 forming a potential divider of $\frac{820}{2.2E6+820}$. With 5V applied as Vcc this gives an input offset of $1.86\,mV$. @@ -153,7 +196,7 @@ So the amplified offset is $\approx 342 \, mV$. We can determine the output of the amplifier by subtracting this amount from the reading. We can also define an acceptable range for the readings. This would depend on the characteristics of milli-volt source, and also on the -thresholds of the volatges considered out of range. For the sake of example let us +thresholds of the voltages considered out of range. For the sake of example let us consider this to be a type K thermocouple amplifier, with a range of temperatures expected to be within {{0}\oc} and {{300}\oc}. @@ -162,8 +205,8 @@ Choosing the common Nickel-Chromium v. Nickel Aluminium `K' type thermocouple, {{0}\oc} provides an EMF of 0mV, and {{300}\oc} 12.207. Multiplying these by 184 and adding the 1.86mV offset gives 342.24mV and 2563.12mV. This is now in a suitable range to be read by -an analogue didtital converter, which will have a voltage span -typically between 3.3V and 5V on modern microcontrollers/ADC (Analogue Digital Converter) chips. +an analogue digital converter, which will have a voltage span +typically between 3.3V and 5V on modern micro-controllers/ADC (Analogue Digital Converter) chips. Note that this also leaves a margin or error on both sides of the range. If the thermocouple were to become colder than {{0}\oc} it would supply a negative voltage, which would subtract from the offset. @@ -242,7 +285,7 @@ this may be unacceptable. We will need to add some type of detection mechanism to the circuit to test $R_{off}$ periodically. For instance were we to check $R_off$ every $\tau = 20mS$ work out detection -allowance according to EN61508. +allowance according to EN61508~\cite{en61508}. \section{Proposed Checking Method} @@ -320,13 +363,13 @@ and the reading is assumed to be valid. \hline \hline %% OK TR1 OFF , and so 36 in series. R36 has shorted so -$\overline{TEST\_LINE}$ ON & TC:1 $R36$ SHORT & No added resistance & NO TEST EFFECT & XX 1.38 \\ \hline +$\overline{TEST\_LINE}$ ON & TC:1 $R36$ SHORT & No added resistance & NO TEST EFFECT & 1.38 \\ \hline %% -$\overline{TEST\_LINE}$ OFF & TC:1 $R36$ SHORT & dormant failure & NO SYMPTOM & XX 1.38 \\ \hline +$\overline{TEST\_LINE}$ OFF & TC:1 $R36$ SHORT & dormant failure & NO SYMPTOM & 1.38 \\ \hline %% here TR1 should be OFF, as R36 is open we now have an open circuit -$\overline{TEST\_LINE}$ ON & TC:2 $R36$ OPEN & open circuit & OPEN CIRCUIT & XX 12.42\\ \hline +$\overline{TEST\_LINE}$ ON & TC:2 $R36$ OPEN & open circuit & OPEN CIRCUIT & 12.42\\ \hline %% here TR1 should be ON and R36 by-passed, the fact it has gone OPEN means no symptom here, a dormant failure. -$\overline{TEST\_LINE}$ OFF & TC:2 $R36$ OPEN & dormant failure & NO SYMPTOM & XX 12.42\\ \hline +$\overline{TEST\_LINE}$ OFF & TC:2 $R36$ OPEN & dormant failure & NO SYMPTOM & 12.42\\ \hline \hline % %% TR1 OFF so R36 should be in series. Because TR1 is ON because it is faulty, R36 is not in series @@ -336,10 +379,10 @@ $\overline{TEST\_LINE}$ LINE ON & TC:3 $TR1$ ALWAYS ON & No added resista $\overline{TEST\_LINE}$ OFF & TC:3 $TR1$ ALWAYS ON & dormant failure & NO SYMPTOM & XX 1.38 \\ \hline %% %% TR1 should be off as overline{TEST\_LINE}$ is ON. As TR1 is faulty it is always off and we have a dormant failure. -$\overline{TEST\_LINE}$ LINE ON & TC:4 $TR1$ ALWAYS OFF & dormant failure & NO SYMPTOM & XX 1.38 \\ \hline +$\overline{TEST\_LINE}$ LINE ON & TC:4 $TR1$ ALWAYS OFF & dormant failure & NO SYMPTOM & 1.38 \\ \hline %% %% TR1 should be ON, but is off due to TR1 failure. The resistance R36 will always be in series therefore -$\overline{TEST\_LINE}$ OFF & TC:4 $TR1$ ALWAYS OFF & resistance always added & NO TEST EFFECT & XX 1.38 \\ \hline +$\overline{TEST\_LINE}$ OFF & TC:4 $TR1$ ALWAYS OFF & resistance always added & NO TEST EFFECT & 1.38 \\ \hline \hline \end{tabular} \label{tab:testaddition} @@ -543,7 +586,7 @@ The meanings of and values assigned to its co-efficients are described in table %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Taking these parameters and applying equation \ref{microcircuitfit}, -$$ 0.04 \times1.4 \times0.0026 \times2.0 \times2.0 \times1.0 = .0005824 $$ +$$ 0.04 \times 1.4 \times 0.0026 \times 2.0 \times 2.0 \times 1.0 = .0005824 $$ we get a value of $0.0005824 \times {10}^6$ failures per hour. This is a worst case FIT\footnote{where FIT (Failure in Time) is defined as failures per Billion (${10}^9$) hours of operation} of 1. @@ -551,7 +594,8 @@ failures per Billion (${10}^9$) hours of operation} of 1. \subsection{Switching Transistor} The switching transistor will be operating at a low frequency -and well within 50\% of its maximum voltage. +and well within 50\% of its maximum voltage. We can also assume a benign +temperature environment of $ < 60^{o}C$. MIL-HDBK-217F\cite{mil1992}[6-25] gives an exmaple transistor in these environmental conditions, and assigns an FIT value of 11. @@ -572,7 +616,7 @@ but statistically less reliable. \paragraph{Practical side effect of checking for thermocouple disconnection} Because the potential divider provides an offset as a side effect of detecting a disconnection -resistance in the thermocopule extension or compensation cable will have an effect. +resistance in the thermocouple extension or compensation cable will have an effect. For a `k' type thermocouple this would be of the order of $0.5 { }^{o}C$ for $10\Omega$ of cable loop impedance. Therefore, accuracy constraints and cable impedance should be considered to determine specified maximum compensation/extension lengths. diff --git a/introduction/introduction.tex b/introduction/introduction.tex index 0e710b6..14a0e18 100644 --- a/introduction/introduction.tex +++ b/introduction/introduction.tex @@ -636,7 +636,7 @@ A technique of modularising, or breaking down the problem is clearly necessary. One question that anyone developing a safety critical analysis design tool could do well to answer, is how the methodology would cope with known previous disasters. -The Challenger disaster is a good example, and was well documented and investigated. +The Challenger disaster is a good example, and was well documented and investigated~\cite{challenger}. The problem lay in a seal that had an operating temperature range. On the day of the launch the temperature of this seal was out of range. diff --git a/logic_diagram/logic_diagram.tex b/logic_diagram/logic_diagram.tex index 3b3e99d..e63ad49 100644 --- a/logic_diagram/logic_diagram.tex +++ b/logic_diagram/logic_diagram.tex @@ -741,7 +741,7 @@ Contour $C$ is \textbf{enclosed} by contour $A$. This says that for failure~mode $C$ to occur failure mode $A$ must have occurred. A famous example of this is the space shuttle `O' ring failure that -caused the 1986 Challenger disaster\cite{wdycwopt}. +caused the 1986 Challenger disaster~\cite{challenger}~\cite{wdycwopt}. For the failure mode to occur, the ambient temperature had to be below a critical value. If we take the failure mode of the `O' ring to be $C$ diff --git a/mybib.bib b/mybib.bib index ce5d325..fe64b01 100644 --- a/mybib.bib +++ b/mybib.bib @@ -305,6 +305,13 @@ year = "1994" } +@MISC{challenger, + author = "U.S. Presidential Commission", + title = "Report of the SpaceShuttle Challanger Accident", + howpublished = "Available from http://science.ksc.nasa.gov/shuttle/missions/51-l/docs/rogers-commission/table-of-contents.html", + year = "1986" +} + @MISC{en61508, author = "E N Standard", title = "EN61508:2002 Functional safety of electrical/electronic/programmable electronic safety related systems", @@ -425,7 +432,7 @@ OPTissn = {}, @TechReport{eurothermtables, - author = {Alan Brown}, + author = {Eurotherm Ltd.}, title = {Thermocouple Emf TABLES and PLATINUM 100 RESISTANCE THERMOMETER TABLES}, institution = {Eurotherm, UK}, year = {1973},