Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
SuffixArraySeqan.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2013.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Clemens Groepl,Andreas Bertsch$
32 // $Authors: Chris Bauer $
33 // --------------------------------------------------------------------------
34 
35 
36 
37 #ifndef OPENMS_DATASTRUCTURES_SUFFIXARRAYSEQAN_H
38 #define OPENMS_DATASTRUCTURES_SUFFIXARRAYSEQAN_H
39 
40 #include <vector>
45 
46 
47 
48 namespace OpenMS
49 {
50 
58  class OPENMS_DLLAPI SuffixArraySeqan :
59  public SuffixArray
60  , public WeightWrapper
61  {
62 
63  typedef seqan::TopDown<seqan::ParentLinks<> > TIterSpec;
64  typedef seqan::Index<seqan::String<char>, seqan::IndexEsa<TIterSpec> > TIndex;
65  typedef seqan::Iter<TIndex, seqan::VSTree<TIterSpec> > TIter;
66 
67  // TODO ??? was: typedef seqan::Index<seqan::String<char>, seqan::Index_ESA<seqan::TopDown<seqan::ParentLinks<seqan::Preorder> > > > TIndex;
68 
69 public:
70 
79  SuffixArraySeqan(const String & st, const String & filename, const WeightWrapper::WEIGHTMODE weight_mode = WeightWrapper::MONO);
80 
84  SuffixArraySeqan(const SuffixArraySeqan & source);
85 
89  virtual ~SuffixArraySeqan();
90 
94  String toString();
95 
104  void findSpec(std::vector<std::vector<std::pair<std::pair<SignedSize, SignedSize>, DoubleReal> > > & candidates, const std::vector<DoubleReal> & spec);
105 
112  bool save(const String & filename);
113 
121  bool open(const String & filename);
122 
128  void setTolerance(DoubleReal t);
129 
134  DoubleReal getTolerance() const;
135 
142  bool isDigestingEnd(const char aa1, const char aa2) const;
143 
149  void setTags(const std::vector<OpenMS::String> & tags);
150 
155  const std::vector<OpenMS::String> & getTags();
156 
161  void setUseTags(bool use_tags);
162 
167  bool getUseTags();
168 
173  void setNumberOfModifications(Size number_of_mods);
174 
179  Size getNumberOfModifications();
180 
181  void printStatistic();
182 
183 protected:
184 
197  inline void goNextSubTree_(TIter & it, DoubleReal & m, std::stack<DoubleReal> & allm, std::stack<std::map<DoubleReal, SignedSize> > & mod_map)
198  {
199  // preorder dfs
200  if (!goRight(it))
201  {
202  while (true)
203  {
204  if (goUp(it))
205  {
206  m -= allm.top();
207  allm.pop();
208  mod_map.pop();
209  }
210  else
211  {
212  break;
213  }
214 
215  if (goRight(it))
216  {
217  m -= allm.top();
218  allm.pop();
219  mod_map.pop();
220  break;
221  }
222  }
223  }
224  else
225  {
226  m -= allm.top();
227  allm.pop();
228  mod_map.pop();
229  }
230  if (isRoot(it))
231  {
232  clear(it);
233  }
234  }
235 
241  inline void goNextSubTree_(TIter & it)
242  {
243  // preorder dfs
244  if (!goRight(it))
245  {
246  while (true)
247  {
248  if (!goUp(it))
249  {
250  break;
251  }
252  if (goRight(it))
253  {
254  break;
255  }
256  }
257  }
258  if (isRoot(it))
259  {
260  clear(it);
261  }
262  }
263 
276  inline void goNext_(TIter & it, DoubleReal & m, std::stack<DoubleReal> & allm, std::stack<std::map<DoubleReal, SignedSize> > & mod_map)
277  {
278  // preorder dfs
279  if (!goDown(it))
280  {
281  goNextSubTree_(it, m, allm, mod_map);
282  }
283  }
284 
285  inline void parseTree_(TIter & it, std::vector<std::pair<SignedSize, SignedSize> > & out_number, std::vector<std::pair<SignedSize, SignedSize> > & edge_length, std::vector<SignedSize> & leafe_depth)
286  {
287  SignedSize depth = 1;
288  while (!atEnd(it))
289  {
290  SignedSize le = 0;
291  bool isLeaf = false;
292  if (length(parentEdgeLabel(it)) > 0)
293  {
294  if (countChildren(it) > 0)
295  {
296  edge_length.push_back(std::pair<SignedSize, SignedSize>(depth, length(parentEdgeLabel(it))));
297  }
298  else
299  {
300  //le <- length(representative(it));
301  //isLeaf = true;
302  }
303  }
304  if (countChildren(it) > 0)
305  {
306  out_number.push_back(std::pair<SignedSize, SignedSize>(depth, countChildren(it)));
307  }
308  else
309  {
310  leafe_depth.push_back(depth);
311  }
312  if (goDown(it))
313  {
314  depth++;
315  }
316  else if (!goRight(it))
317  {
318  while (!goRight(it))
319  {
320  goUp(it);
321  if (isLeaf)
322  {
323  edge_length.push_back(std::pair<SignedSize, SignedSize>(depth, le - length(parentEdgeLabel(it))));
324  isLeaf = false;
325  }
326  depth--;
327  if (isRoot(it)) return;
328  }
329  }
330  else
331  {
332  }
333  }
334  }
335 
337 
338  TIter * it_;
339 
347  SignedSize findFirst_(const std::vector<DoubleReal> & spec, DoubleReal & m);
348 
358  SignedSize findFirst_(const std::vector<DoubleReal> & spec, DoubleReal & m, SignedSize start, SignedSize end);
359 
360  const String & s_;
361 
362  DoubleReal masse_[255];
363 
365 
366  std::vector<String> tags_;
367 
368  bool use_tags_;
369 
371  };
372 }
373 
374 #endif //OPENMS_DATASTRUCTURES_SUFFIXARRAYSEQAN_H
A more convenient string class.
Definition: String.h:56
Class that uses SEQAN library for a suffix array. It can be used to find peptide Candidates for a MS ...
Definition: SuffixArraySeqan.h:58
bool use_tags_
if tags are used
Definition: SuffixArraySeqan.h:368
const String & s_
reference to strings for which the suffix array is build
Definition: SuffixArraySeqan.h:360
seqan::Index< seqan::String< char >, seqan::IndexEsa< TIterSpec > > TIndex
Definition: SuffixArraySeqan.h:64
seqan::Iter< TIndex, seqan::VSTree< TIterSpec > > TIter
Definition: SuffixArraySeqan.h:65
TIter * it_
seqan suffix array iterator
Definition: SuffixArraySeqan.h:338
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:151
seqan::TopDown< seqan::ParentLinks<> > TIterSpec
Definition: SuffixArraySeqan.h:63
SignedSize number_of_modifications_
number of allowed modifications
Definition: SuffixArraySeqan.h:364
void goNextSubTree_(TIter &it)
goes to the next sub tree
Definition: SuffixArraySeqan.h:241
WEIGHTMODE
Definition: WeightWrapper.h:55
Definition: WeightWrapper.h:55
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:144
std::vector< String > tags_
all tags
Definition: SuffixArraySeqan.h:366
void parseTree_(TIter &it, std::vector< std::pair< SignedSize, SignedSize > > &out_number, std::vector< std::pair< SignedSize, SignedSize > > &edge_length, std::vector< SignedSize > &leafe_depth)
Definition: SuffixArraySeqan.h:285
TIndex index_
seqan suffix array
Definition: SuffixArraySeqan.h:336
void goNext_(TIter &it, DoubleReal &m, std::stack< DoubleReal > &allm, std::stack< std::map< DoubleReal, SignedSize > > &mod_map)
overwriting goNext from seqan index_esa_stree.h for mass update during suffix array traversal ...
Definition: SuffixArraySeqan.h:276
Encapsulated weight queries to simplify mono vs average weight computation.
Definition: WeightWrapper.h:50
void goNextSubTree_(TIter &it, DoubleReal &m, std::stack< DoubleReal > &allm, std::stack< std::map< DoubleReal, SignedSize > > &mod_map)
overwriting goNextSubTree_ from seqan index_esa_stree.h for mass update during suffix array traversal...
Definition: SuffixArraySeqan.h:197
DoubleReal tol_
tolerance
Definition: SuffixArraySeqan.h:370
abstract class for suffix array
Definition: SuffixArray.h:51

OpenMS / TOPP release 1.11.1 Documentation generated on Thu Nov 14 2013 11:19:21 using doxygen 1.8.5