diff --git a/mybib.bib b/mybib.bib index 2691f77..e270a04 100644 --- a/mybib.bib +++ b/mybib.bib @@ -1,3 +1,11 @@ +@INPROCEEDINGS{probfmea_4338247, +author={Grunske, Lars and Colvin, R. and Winter, K.}, +booktitle={Quantitative Evaluation of Systems, 2007. QEST 2007. Fourth International Conference on the}, title={Probabilistic Model-Checking Support for FMEA}, +year={Sept.}, +pages={119-128}, +keywords={fault diagnosis;formal verification;probability;system monitoring;effect analysis;failure mode;probabilistic fault injection;probabilistic model checking;Accidents;Australia;Cause effect analysis;Failure analysis;Hazards;Information analysis;Information technology;Probability;Risk analysis;Safety;Failure Mode;Probabilistic Model Checking;System Safety;and Effect Analysis}, +doi={10.1109/QEST.2007.18},} + @article{parnas1991assessment, title={Assessment of safety-critical software in nuclear power plants.}, @@ -830,7 +838,31 @@ strength of materials, the causes of boiler explosions", year = 2009 } +@inproceedings{Bishop:2010:ONT:1886301.1886325, + author = {Bishop, Peter and Cyra, Lukasz}, + title = {Overcoming non-determinism in testing smart devices: a case study}, + booktitle = {Proceedings of the 29th international conference on Computer safety, reliability, and security}, + series = {SAFECOMP'10}, + year = {2010}, + isbn = {3-642-15650-9, 978-3-642-15650-2}, + location = {Vienna, Austria}, + pages = {237--250}, + numpages = {14}, + url = {http://dl.acm.org.ezproxy.brighton.ac.uk/citation.cfm?id=1886301.1886325}, + acmid = {1886325}, + publisher = {Springer-Verlag}, + address = {Berlin, Heidelberg}, + keywords = {non-determinism, safety, smart instruments, testing}, +} +@article{smartinstruments, + title="Smart instruments in safety instrumented systems: http://www.isa.org/InTechTemplate.cfm?template=/ContentManagement/ContentDisplay.cfm$\&$ContentID=77994", + author={Tom S nobes}, + year={2009}, + biburl="http://www.isa.org/InTechTemplate.cfm?template=/ContentManagement/ContentDisplay.cfm\&ContentID=77994", + } + + @PHDTHESIS{garrett, AUTHOR = "Chris Garrett", TITLE = "Functional diagnosis strategies for analog systems using heuristic programming techniques", diff --git a/related_papers_books/probablistic_modelchecking_support_FMEA_04338247.pdf b/related_papers_books/probablistic_modelchecking_support_FMEA_04338247.pdf new file mode 100644 index 0000000..9c45fb8 Binary files /dev/null and b/related_papers_books/probablistic_modelchecking_support_FMEA_04338247.pdf differ diff --git a/submission_thesis/CH2_FMEA/copy.tex b/submission_thesis/CH2_FMEA/copy.tex index 28be3c8..ae82a11 100644 --- a/submission_thesis/CH2_FMEA/copy.tex +++ b/submission_thesis/CH2_FMEA/copy.tex @@ -194,9 +194,12 @@ only requires that the failure mode OPEN be considered for FMEA analysis. % For resistor types not specifically listed in EN298, the failure modes are considered to be either OPEN or SHORT. +% The reason that parameter change is not considered for resistors chosen for an EN298 compliant system, is that they must be must be {\em downrated}. That is to say the power and voltage ratings of components must be calculated -for maximum possible exposure, with a 40\% margin of error. This drastically reduces the probability +for maximum possible exposure, with a 40\% margin of error. +% +This drastically reduces the probability that the resistors will be overloaded, and thus subject to drift/parameter change. @@ -567,8 +570,12 @@ we can hop from module to module eliminating working modules, until we find the failure. -FMEA is a theoretical discipline. It would be very unusual to build a circuit and then simulate -component failure modes. This would be very time consuming as it would involve building a circuit for each component {\fm} in the system. +FMEA is a theoretical discipline. +% +It would be very unusual to build a circuit and then simulate +component failure modes. +% +This would be time consuming as it would involve building a circuit for each component {\fm} in the system. % We cannot, as with fault finding, verify modules along the signal path for correct behaviour and eliminate them from the investigation. @@ -580,7 +587,7 @@ Too much and the task becomes impossible due to time/labour constraints. Too little and the analysis could become meaningless because it misses potential system failures. % -For a more complete analysis we should perhaps, examine each component {\fm} along the complete signal path, +For a more complete analysis we should perhaps examine each component {\fm} along the complete signal path, forwards and backwards from the placement of the component exhibiting the {\fm} under investigation. % @@ -603,11 +610,25 @@ The concept of the unacceptability of a single component failure causing a syste is an important and easily understood measurement of safety. % It is easy to calculate -because we can usually find Mean Time to Failure (MTTF) statistics for commonly used components. +because we can usually find Mean Time to Failure (MTTF) statistics~\cite{fmd91,mil1991} for commonly used components. % Also, used in the design phase of a project, FMEA is a useful tool for discovering potential failure scenarios~\cite{1778436820050601}. % +From a whole system perspective, we may find that {\bc} {\fms} +may have more than one possible system event associated with them. +Often there will be a clear one to one mapping, but +probabilities to failure (as used in FMECA) +could mean one to many.% mapping. +% +We could represent a failure mode and its possible outcomes using a Markov chain~\cite{probfmea_4338247}. +% +Where multiple simultaneous\footnote{Multiple simultaneous failures are taken to mean failures that occur within the same detection period.} +failure modes are considered this complicates +the statistical nature of the Markov chain, cause effect model. +% +What we in fact get is the merging, or local interaction of two Markov chains +for our cause effect model. % Subject Object Wiki answers : Best Answer %It is not grammar or vocabulary. It is a philosophical reference. %The dichotomy is the surrounding view of self that we act out of. It is often learned with language and not taught [like the alphabet and numbers are taught] in early life through language and the forming of distinctions. @@ -628,17 +649,28 @@ the subjective sense can we determine its meaning and/or severity. It is worth remembering that failure mode analysis performed on the leaks possible from the O ring on the space shuttle did not link this failure to the catastrophic failure of the spacecraft~\cite{challenger,sanjeev}. +% This was not a failure in the objective reasoning, but more of the subjective, or the context in which the leak occurred. % +What this means is that for an objectively calculated failure mode outcome, we may have +more than one subjective outcome definition for it. + +\paragraph{Multiple Simultaneous Failure Modes} +% FMEA is less useful for determining events for multiple -simultaneous\footnote{Multiple simultaneous failures are taken to mean failures that occur within the same detection period.} -failures. +simultaneous +failures\footnote{Multiple simultaneous failures are taken to mean failures that occur within the same detection period.}. % Work has been performed using component failure statistics to offer the more likely multiple failures~\cite{FMEAmultiple653556} for analysis. % +We now compound the multiple symptoms from one {\bc} {\fm} possibility +with the merging of Markov chains. % -This is because with the additional complication of having to change between these two modes of thinking, it becomes more difficult to +So for multiple failures we have the objective criteria complicated, and the subjective +adds another layer of complication. +% +Also with the additional complication of having to change between these two modes of thinking, it becomes more difficult to get a balance between subjective and objective perspectives. %subjective/objective become more cluttered when there are multiple possibilities @@ -668,7 +700,7 @@ Modern electronic components, are generally very reliable, and the systems built are thus very reliable too. Reliable field data on failures will, therefore be sparse. Should we wish to prove a continuous demand system for say ${10}^{-7}$ failures\footnote{${10}^{-7}$ failures per hour of operation is the threshold for S.I.L. 3 reliability~\cite{en61508}. Failure rates are normally measured per $10^9$ hours of operation -and are know as Failure in Time (FIT) values. The maximum FIT values for a SIL 3 system is therefore 100.} +and are known as Failure in Time (FIT) values. The maximum FIT values for a SIL 3 system is therefore 100.} per hour of operation, even with 1000 correctly monitored units in the field we could only expect one failure per ten thousand hours (a little over one a year). It would be utterly impractical to get statistically significant data for equipment @@ -725,8 +757,10 @@ methodologies. FMEA for a safety critical certification~\cite{en298,en61508} will have to be applied to all known failure modes of all components within a system. +% FMEA does not define or specify the scope of the investigation of each component failure mode. Should we follow the signal path, and all components we encounter along that, or should the scope be wider? +% If we were to examine the effect of a component {\fm} against all other components in a system, this could be said to be exhaustive analysis. @@ -779,7 +813,7 @@ we rely on experts in the system under investigation to perform a meaningful FMEA analysis. % In practise these experts have to select the areas they see as most critical for detailed FMEA analysis: -its is usually impossible to perform a detail level of analysis on all component {\fms} +it is usually impossible to perform a detail level of analysis on all component {\fms} on anything but a non-trivial system. \subsection{Component Tolerance} @@ -787,6 +821,7 @@ on anything but a non-trivial system. Component tolerances may need considered when determining if a component has failed. Calculations for acceptable ranges to determine failure or acceptable conditions must be made where appropriate. +% An example of component tolerance considered for FMEA is given in section~\ref{sec:resistortolerance}. @@ -795,7 +830,7 @@ is given in section~\ref{sec:resistortolerance}. \paragraph{Five main Variants of FMEA} \begin{itemize} \item \textbf{PFMEA - Production} Emphasis on cost reduction and product improvement; - \item \textbf{FMECA - Criticality} Emphasis on minimising the effect of critial systems failing; % Military/Space + \item \textbf{FMECA - Criticality} Emphasis on minimising the effect of critical systems failing; % Military/Space \item \textbf{FMEDA - Statistical safety} Statistical analysis giving Safety Integrity Levels; \item \textbf{DFMEA - Design or static/theoretical} Approval of safety critical systems using FMEA and single or double failure prevention;% EN298/EN230/UL1998 \item \textbf{SFMEA - Software FMEA --- only used in highly critical systems at present} diff --git a/submission_thesis/CH3_FMEA_criticism/copy.tex b/submission_thesis/CH3_FMEA_criticism/copy.tex index 630f1cc..e3af34b 100644 --- a/submission_thesis/CH3_FMEA_criticism/copy.tex +++ b/submission_thesis/CH3_FMEA_criticism/copy.tex @@ -15,8 +15,9 @@ This analysis philosophy has not changed since FMEA was first used. \subsection{FMEA does not support modularity.} -It is a common practise in the process control industry to buy in sub-systems, typically sensors and actuators connected to an industrially hardened computer bus, i.e. CANbus~\cite{can,canspec}, modbus~\cite{modbus} etc. -Most sensor systems now are `smart', that is to say, they contain programmatic elements +It is a common practise in the process control industry to buy in sub-systems, +typically sensors and actuators connected to an industrially hardened computer bus, i.e. CANbus~\cite{can,canspec}, modbus~\cite{modbus} etc. +Most sensor systems now are `smart'~\cite{smartinstruments}, that is to say, they contain programmatic elements even if their outputs are %they supply analogue signals. For instance a liquid level sensor that supplies a {\ft} output, would have been typically have been implemented @@ -124,6 +125,11 @@ For highly critical systems i.e. the nuclear industry~\cite{parnas1991assessment the instruments used to perform these measurements, must be analysed using traditional assessment (which entails FMEA), to ensure that failure modes within the instrument cannot lead to invalid measurements. % +Some work has been performed to offer black~box---or functional testing---of these instruments instead of +static analysis~\cite{Bishop:2010:ONT:1886301.1886325}. +However, black box testing of smart instruments is +yet to be a an approved method of validation. +% Most modern instruments now use highly integrated electronics coupled to micro-controllers, which read and filter the measurements, and interface to an LCD readout. % @@ -132,11 +138,14 @@ the design of instruments. % While noting that being more modern, these instruments are likely to be more reliable and accurate than the analogue instruments in use some twenty years ago but this cannot be validated -to a high level of reliability by traditional FMEA. +to a high level of reliability. This remains an unsolved problem for the industries dealing with highly safety critical +systems. %by traditional FMEA. +%to a high level of reliability by traditional FMEA. % Currently the only way that some smart~instruments have been permitted for use in highly critical systems is the have the extensively functionally tested~\cite{bishopsmartinstruments}. +%>>>>>>> 1b3d54f0ec2963017e98c4cdadc9a72a8bac911a \subsection{Distributed real time systems}