Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
ConfidenceScoring.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2013.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hendrik Weisser $
32 // $Authors: Hannes Roest, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_ANALYSIS_OPENSWATH_CONFIDENCESCORING
36 #define OPENMS_ANALYSIS_OPENSWATH_CONFIDENCESCORING
37 
38 #include <cmath> // for "exp"
39 #include <ctime> // for "time" (random number seed)
40 #include <limits> // for "infinity"
41 #include <boost/bimap.hpp>
42 #include <boost/bimap/multiset_of.hpp>
43 #include <boost/random/uniform_int.hpp>
44 #include <boost/random/mersenne_twister.hpp>
45 #include <boost/random/variate_generator.hpp>
46 
50 
54 
55 namespace OpenMS
56 {
57 
58 class OPENMS_DLLAPI ConfidenceScoring :
59  public ProgressLogger
60 {
61 public:
62 
64  ConfidenceScoring(bool test_mode_=false) :
65  generator_(), rand_gen_(generator_, boost::uniform_int<>())
66  {
67  if (!test_mode_) rand_gen_.engine().seed(time(0)); // seed with current time
68  }
69 
70 protected:
71 
73  typedef boost::bimap<DoubleReal, boost::bimaps::multiset_of<DoubleReal> >
75 
77  struct
78  {
82 
83  DoubleReal operator()(DoubleReal diff_rt, DoubleReal dist_int)
84  {
85  DoubleReal lm = intercept + rt_coef * diff_rt * diff_rt +
86  int_coef * dist_int;
87  return 1.0 / (1.0 + exp(-lm));
88  }
89  } glm_;
90 
92  struct
93  {
96 
97  DoubleReal operator()(DoubleReal rt)
98  {
99  return (rt - min_rt) / (max_rt - min_rt) * 100;
100  }
101  } rt_norm_;
102 
103  TargetedExperiment library_; // assay library
104 
105  IntList decoy_index_; // indexes of assays to use as decoys
106 
107  Size n_decoys_; // number of decoys to use (per feature/true assay)
108 
109  Map<String, IntList> transition_map_; // assay (ID) -> transitions (indexes)
110 
111  Size n_transitions_; // number of transitions to consider
112 
115 
116  boost::mt19937 generator_; // random number generation engine
117 
119  boost::variate_generator<boost::mt19937&, boost::uniform_int<> > rand_gen_;
120 
122  void chooseDecoys_();
123 
125  DoubleReal manhattanDist_(DoubleList x, DoubleList y);
126 
128  DoubleReal getAssayRT_(const TargetedExperiment::Peptide& assay,
129  const String& cv_accession = "MS:1000896");
130 
133  void extractIntensities_(BimapType& intensity_map, Size n_transitions,
134  DoubleList& intensities);
135 
139  DoubleReal scoreAssay_(const TargetedExperiment::Peptide& assay,
140  DoubleReal feature_rt, DoubleList& feature_intensities,
141  const std::set<String>& transition_ids = std::set<String>());
142 
144  void scoreFeature_(Feature& feature);
145 
146 public:
147 
148  void initialize(TargetedExperiment library, Size n_decoys, Size n_transitions, TransformationDescription rt_trafo)
149  {
150  library_ = TargetedExperiment(library);
151  n_decoys_ = n_decoys;
152  n_transitions_ = n_transitions;
153  rt_trafo_ = rt_trafo;
154  }
155 
156  void initializeGlm(double intercept, double rt_coef, double int_coef)
157  {
158  glm_.intercept = intercept;
159  glm_.rt_coef = rt_coef;
160  glm_.int_coef = int_coef;
161  }
162 
175  void scoreMap(FeatureMap<> & features)
176  {
177  // are there enough assays in the library?
178  Size n_assays = library_.getPeptides().size();
179  if (n_assays < 2)
180  {
181  throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__,
182  "There need to be at least 2 assays in the library for ConfidenceScoring.");
183 
184  }
185  if (n_assays - 1 < n_decoys_)
186  {
187  LOG_WARN << "Warning: Parameter 'decoys' (" << n_decoys_
188  << ") is higher than the number of unrelated assays in the "
189  << "library (" << n_assays - 1 << "). "
190  << "Using all unrelated assays as decoys." << std::endl;
191  }
192  if (n_assays - 1 <= n_decoys_) n_decoys_ = 0; // use all available assays
193 
194  decoy_index_.resize(n_assays);
195  for (Size i = 0; i < n_assays; ++i) decoy_index_[i] = boost::numeric_cast<Int>(i);
196 
197  // build mapping between assays and transitions:
198  LOG_DEBUG << "Building transition map..." << std::endl;
199  for (Size i = 0; i < library_.getTransitions().size(); ++i)
200  {
201  const String& ref = library_.getTransitions()[i].getPeptideRef();
202  transition_map_[ref].push_back(boost::numeric_cast<Int>(i));
203  }
204  // find min./max. RT in the library:
205  LOG_DEBUG << "Determining retention time range..." << std::endl;
206  rt_norm_.min_rt = std::numeric_limits<double>::infinity();
207  rt_norm_.max_rt = -std::numeric_limits<double>::infinity();
208  for (std::vector<TargetedExperiment::Peptide>::const_iterator it =
209  library_.getPeptides().begin(); it != library_.getPeptides().end();
210  ++it)
211  {
212  DoubleReal current_rt = getAssayRT_(*it);
213  if (current_rt == -1.0) continue; // indicates a missing value
214  rt_norm_.min_rt = std::min(rt_norm_.min_rt, current_rt);
215  rt_norm_.max_rt = std::max(rt_norm_.max_rt, current_rt);
216  }
217 
218  // log scoring progress:
219  LOG_DEBUG << "Scoring features..." << std::endl;
220  startProgress(0, features.size(), "scoring features");
221 
222  for (FeatureMap<>::Iterator feat_it = features.begin();
223  feat_it != features.end(); ++feat_it)
224  {
225  LOG_DEBUG << "Feature " << feat_it - features.begin() + 1
226  << " (ID '" << feat_it->getUniqueId() << "')"<< std::endl;
227  scoreFeature_(*feat_it);
228  setProgress(feat_it - features.begin());
229  }
230  endProgress();
231 
232  }
233 
234 };
235 
236 }
237 
238 #endif // OPENMS_ANALYSIS_OPENSWATH_CONFIDENCESCORING
DoubleReal rt_coef
Definition: ConfidenceScoring.h:80
A more convenient string class.
Definition: String.h:56
void initializeGlm(double intercept, double rt_coef, double int_coef)
Definition: ConfidenceScoring.h:156
DoubleReal min_rt
Definition: ConfidenceScoring.h:94
DoubleReal int_coef
Definition: ConfidenceScoring.h:81
void scoreMap(FeatureMap<> &features)
Score a feature map -&gt; make sure the class is properly initialized.
Definition: ConfidenceScoring.h:175
Map< String, IntList > transition_map_
Definition: ConfidenceScoring.h:109
#define LOG_DEBUG
Macro for general debugging information.
Definition: LogStream.h:459
#define LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged...
Definition: LogStream.h:451
TargetedExperiment library_
Definition: ConfidenceScoring.h:103
TransformationDescription rt_trafo_
RT transformation to map measured RTs to assay RTs.
Definition: ConfidenceScoring.h:114
boost::mt19937 generator_
Definition: ConfidenceScoring.h:116
DoubleReal max_rt
Definition: ConfidenceScoring.h:95
A method or algorithm argument contains illegal values.
Definition: Exception.h:634
ConfidenceScoring(bool test_mode_=false)
Constructor.
Definition: ConfidenceScoring.h:64
DoubleReal intercept
Definition: ConfidenceScoring.h:79
IntList decoy_index_
Definition: ConfidenceScoring.h:105
An LC-MS feature.
Definition: Feature.h:66
void initialize(TargetedExperiment library, Size n_decoys, Size n_transitions, TransformationDescription rt_trafo)
Definition: ConfidenceScoring.h:148
Definition: ConfidenceScoring.h:58
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:144
Size n_decoys_
Definition: ConfidenceScoring.h:107
Size n_transitions_
Definition: ConfidenceScoring.h:111
Base class for all classes that want to report their progess.
Definition: ProgressLogger.h:56
This class stores an prediction of an SRM/MRM transition.
Definition: TargetedExperiment.h:53
Generic description of a coordinate transformation.
Definition: TransformationDescription.h:59
boost::variate_generator< boost::mt19937 &, boost::uniform_int<> > rand_gen_
Random number generator (must be initialized in init. list of c&#39;tor!)
Definition: ConfidenceScoring.h:119
Map class based on the STL map (containing serveral convenience functions)
Definition: Map.h:50
UInt64 getUniqueId() const
Non-mutable access to unique id - returns the unique id.
Definition: UniqueIdInterface.h:109
DoubleReal list.
Definition: DoubleList.h:56
boost::bimap< DoubleReal, boost::bimaps::multiset_of< DoubleReal > > BimapType
Mapping: Q3 m/z &lt;-&gt; transition intensity (maybe not unique!)
Definition: ConfidenceScoring.h:74
Definition: TargetedExperimentHelper.h:211
Int list.
Definition: IntList.h:56

OpenMS / TOPP release 1.11.1 Documentation generated on Thu Nov 14 2013 11:19:12 using doxygen 1.8.5