Clover icon

Coverage Report

  1. Project Clover database Thu Aug 13 2020 12:04:21 BST
  2. Package jalview.datamodel.features

File FeatureStore.java

 

Coverage histogram

../../../img/srcFileCovDistChart10.png
0% of files have more coverage

Code metrics

118
186
27
1
854
436
97
0.52
6.89
27
3.59

Classes

Class Line # Actions
FeatureStore 43 186 97
0.9546827795.5%
 

Contributing tests

This file is covered by 155 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.datamodel.features;
22   
23    import java.util.ArrayList;
24    import java.util.Collections;
25    import java.util.HashSet;
26    import java.util.List;
27    import java.util.Set;
28   
29    import intervalstore.api.IntervalStoreI;
30    import intervalstore.impl.BinarySearcher;
31    import intervalstore.impl.BinarySearcher.Compare;
32    import intervalstore.impl.IntervalStore;
33    import jalview.datamodel.SequenceFeature;
34   
35    /**
36    * A data store for a set of sequence features that supports efficient lookup of
37    * features overlapping a given range. Intended for (but not limited to) storage
38    * of features for one sequence and feature type.
39    *
40    * @author gmcarstairs
41    *
42    */
 
43    public class FeatureStore
44    {
45    /*
46    * Non-positional features have no (zero) start/end position.
47    * Kept as a separate list in case this criterion changes in future.
48    */
49    List<SequenceFeature> nonPositionalFeatures;
50   
51    /*
52    * contact features ordered by first contact position
53    */
54    List<SequenceFeature> contactFeatureStarts;
55   
56    /*
57    * contact features ordered by second contact position
58    */
59    List<SequenceFeature> contactFeatureEnds;
60   
61    /*
62    * IntervalStore holds remaining features and provides efficient
63    * query for features overlapping any given interval
64    */
65    IntervalStoreI<SequenceFeature> features;
66   
67    /*
68    * Feature groups represented in stored positional features
69    * (possibly including null)
70    */
71    Set<String> positionalFeatureGroups;
72   
73    /*
74    * Feature groups represented in stored non-positional features
75    * (possibly including null)
76    */
77    Set<String> nonPositionalFeatureGroups;
78   
79    /*
80    * the total length of all positional features; contact features count 1 to
81    * the total and 1 to size(), consistent with an average 'feature length' of 1
82    */
83    int totalExtent;
84   
85    float positionalMinScore;
86   
87    float positionalMaxScore;
88   
89    float nonPositionalMinScore;
90   
91    float nonPositionalMaxScore;
92   
93    /**
94    * Constructor
95    */
 
96  2267 toggle public FeatureStore()
97    {
98  2267 features = new IntervalStore<>();
99  2267 positionalFeatureGroups = new HashSet<>();
100  2267 nonPositionalFeatureGroups = new HashSet<>();
101  2267 positionalMinScore = Float.NaN;
102  2267 positionalMaxScore = Float.NaN;
103  2267 nonPositionalMinScore = Float.NaN;
104  2267 nonPositionalMaxScore = Float.NaN;
105   
106    // we only construct nonPositionalFeatures, contactFeatures if we need to
107    }
108   
109    /**
110    * Adds one sequence feature to the store, and returns true, unless the
111    * feature is already contained in the store, in which case this method
112    * returns false. Containment is determined by SequenceFeature.equals()
113    * comparison.
114    *
115    * @param feature
116    */
 
117  174380 toggle public boolean addFeature(SequenceFeature feature)
118    {
119  174380 if (contains(feature))
120    {
121  72419 return false;
122    }
123   
124    /*
125    * keep a record of feature groups
126    */
127  101961 if (!feature.isNonPositional())
128    {
129  101898 positionalFeatureGroups.add(feature.getFeatureGroup());
130    }
131   
132  101961 if (feature.isContactFeature())
133    {
134  42 addContactFeature(feature);
135    }
136  101919 else if (feature.isNonPositional())
137    {
138  63 addNonPositionalFeature(feature);
139    }
140    else
141    {
142  101856 addNestedFeature(feature);
143    }
144   
145    /*
146    * record the total extent of positional features, to make
147    * getTotalFeatureLength possible; we count the length of a
148    * contact feature as 1
149    */
150  101961 totalExtent += getFeatureLength(feature);
151   
152    /*
153    * record the minimum and maximum score for positional
154    * and non-positional features
155    */
156  101961 float score = feature.getScore();
157  101961 if (!Float.isNaN(score))
158    {
159  101817 if (feature.isNonPositional())
160    {
161  40 nonPositionalMinScore = min(nonPositionalMinScore, score);
162  40 nonPositionalMaxScore = max(nonPositionalMaxScore, score);
163    }
164    else
165    {
166  101777 positionalMinScore = min(positionalMinScore, score);
167  101777 positionalMaxScore = max(positionalMaxScore, score);
168    }
169    }
170   
171  101961 return true;
172    }
173   
174    /**
175    * Answers true if this store contains the given feature (testing by
176    * SequenceFeature.equals), else false
177    *
178    * @param feature
179    * @return
180    */
 
181  174393 toggle public boolean contains(SequenceFeature feature)
182    {
183  174393 if (feature.isNonPositional())
184    {
185  71 return nonPositionalFeatures == null ? false : nonPositionalFeatures
186    .contains(feature);
187    }
188   
189  174322 if (feature.isContactFeature())
190    {
191  46 return contactFeatureStarts == null ? false : listContains(
192    contactFeatureStarts, feature);
193    }
194   
195  174276 return features == null ? false : features
196    .contains(feature);
197    }
198   
199    /**
200    * Answers the 'length' of the feature, counting 0 for non-positional features
201    * and 1 for contact features
202    *
203    * @param feature
204    * @return
205    */
 
206  105012 toggle protected static int getFeatureLength(SequenceFeature feature)
207    {
208  105012 if (feature.isNonPositional())
209    {
210  64 return 0;
211    }
212  104948 if (feature.isContactFeature())
213    {
214  62 return 1;
215    }
216  104886 return 1 + feature.getEnd() - feature.getBegin();
217    }
218   
219    /**
220    * Adds the feature to the list of non-positional features (with lazy
221    * instantiation of the list if it is null), and returns true. The feature
222    * group is added to the set of distinct feature groups for non-positional
223    * features. This method allows duplicate features, so test before calling to
224    * prevent this.
225    *
226    * @param feature
227    */
 
228  63 toggle protected boolean addNonPositionalFeature(SequenceFeature feature)
229    {
230  63 if (nonPositionalFeatures == null)
231    {
232  58 nonPositionalFeatures = new ArrayList<>();
233    }
234   
235  63 nonPositionalFeatures.add(feature);
236   
237  63 nonPositionalFeatureGroups.add(feature.getFeatureGroup());
238   
239  63 return true;
240    }
241   
242    /**
243    * Adds one feature to the IntervalStore that can manage nested features
244    * (creating the IntervalStore if necessary)
245    */
 
246  101856 toggle protected synchronized void addNestedFeature(SequenceFeature feature)
247    {
248  101856 if (features == null)
249    {
250  0 features = new IntervalStore<>();
251    }
252  101856 features.add(feature);
253    }
254   
255    /**
256    * Add a contact feature to the lists that hold them ordered by start (first
257    * contact) and by end (second contact) position, ensuring the lists remain
258    * ordered, and returns true. This method allows duplicate features to be
259    * added, so test before calling to avoid this.
260    *
261    * @param feature
262    * @return
263    */
 
264  42 toggle protected synchronized boolean addContactFeature(SequenceFeature feature)
265    {
266  42 if (contactFeatureStarts == null)
267    {
268  25 contactFeatureStarts = new ArrayList<>();
269    }
270  42 if (contactFeatureEnds == null)
271    {
272  25 contactFeatureEnds = new ArrayList<>();
273    }
274   
275    /*
276    * insert into list sorted by start (first contact position):
277    * binary search the sorted list to find the insertion point
278    */
279  42 int insertPosition = BinarySearcher.findFirst(contactFeatureStarts,
280    true, Compare.GE, feature.getBegin());
281  42 contactFeatureStarts.add(insertPosition, feature);
282   
283   
284    /*
285    * insert into list sorted by end (second contact position):
286    * binary search the sorted list to find the insertion point
287    */
288  42 insertPosition = BinarySearcher.findFirst(contactFeatureEnds,
289    false, Compare.GE, feature.getEnd());
290  42 contactFeatureEnds.add(insertPosition, feature);
291   
292  42 return true;
293    }
294   
295    /**
296    * Answers true if the list contains the feature, else false. This method is
297    * optimised for the condition that the list is sorted on feature start
298    * position ascending, and will give unreliable results if this does not hold.
299    *
300    * @param features
301    * @param feature
302    * @return
303    */
 
304  27 toggle protected static boolean listContains(List<SequenceFeature> features,
305    SequenceFeature feature)
306    {
307  27 if (features == null || feature == null)
308    {
309  3 return false;
310    }
311   
312    /*
313    * locate the first entry in the list which does not precede the feature
314    */
315    // int pos = binarySearch(features,
316    // SearchCriterion.byFeature(feature, RangeComparator.BY_START_POSITION));
317  24 int pos = BinarySearcher.findFirst(features, true, Compare.GE,
318    feature.getBegin());
319  24 int len = features.size();
320  33 while (pos < len)
321    {
322  18 SequenceFeature sf = features.get(pos);
323  18 if (sf.getBegin() > feature.getBegin())
324    {
325  5 return false; // no match found
326    }
327  13 if (sf.equals(feature))
328    {
329  4 return true;
330    }
331  9 pos++;
332    }
333  15 return false;
334    }
335   
336    /**
337    * Returns a (possibly empty) list of features whose extent overlaps the given
338    * range. The returned list is not ordered. Contact features are included if
339    * either of the contact points lies within the range.
340    *
341    * @param start
342    * start position of overlap range (inclusive)
343    * @param end
344    * end position of overlap range (inclusive)
345    * @return
346    */
 
347  4152 toggle public List<SequenceFeature> findOverlappingFeatures(long start, long end)
348    {
349  4152 List<SequenceFeature> result = new ArrayList<>();
350   
351  4152 findContactFeatures(start, end, result);
352   
353  4152 if (features != null)
354    {
355  4152 result.addAll(features.findOverlaps(start, end));
356    }
357   
358  4152 return result;
359    }
360   
361    /**
362    * Adds contact features to the result list where either the second or the
363    * first contact position lies within the target range
364    *
365    * @param from
366    * @param to
367    * @param result
368    */
 
369  4152 toggle protected void findContactFeatures(long from, long to,
370    List<SequenceFeature> result)
371    {
372  4152 if (contactFeatureStarts != null)
373    {
374  41 findContactStartOverlaps(from, to, result);
375    }
376  4152 if (contactFeatureEnds != null)
377    {
378  41 findContactEndOverlaps(from, to, result);
379    }
380    }
381   
382    /**
383    * Adds to the result list any contact features whose end (second contact
384    * point), but not start (first contact point), lies in the query from-to
385    * range
386    *
387    * @param from
388    * @param to
389    * @param result
390    */
 
391  41 toggle protected void findContactEndOverlaps(long from, long to,
392    List<SequenceFeature> result)
393    {
394    /*
395    * find the first contact feature (if any)
396    * whose end point is not before the target range
397    */
398  41 int index = BinarySearcher.findFirst(contactFeatureEnds,
399    false, Compare.GE, (int) from);
400   
401  67 while (index < contactFeatureEnds.size())
402    {
403  52 SequenceFeature sf = contactFeatureEnds.get(index);
404  52 if (!sf.isContactFeature())
405    {
406  0 System.err.println("Error! non-contact feature type "
407    + sf.getType() + " in contact features list");
408  0 index++;
409  0 continue;
410    }
411   
412  52 int begin = sf.getBegin();
413  52 if (begin >= from && begin <= to)
414    {
415    /*
416    * this feature's first contact position lies in the search range
417    * so we don't include it in results a second time
418    */
419  10 index++;
420  10 continue;
421    }
422   
423  42 if (sf.getEnd() > to)
424    {
425    /*
426    * this feature (and all following) has end point after the target range
427    */
428  26 break;
429    }
430   
431    /*
432    * feature has end >= from and end <= to
433    * i.e. contact end point lies within overlap search range
434    */
435  16 result.add(sf);
436  16 index++;
437    }
438    }
439   
440    /**
441    * Adds contact features whose start position lies in the from-to range to the
442    * result list
443    *
444    * @param from
445    * @param to
446    * @param result
447    */
 
448  41 toggle protected void findContactStartOverlaps(long from, long to,
449    List<SequenceFeature> result)
450    {
451  41 int index = BinarySearcher.findFirst(contactFeatureStarts,
452    true, Compare.GE, (int) from);
453   
454  51 while (index < contactFeatureStarts.size())
455    {
456  20 SequenceFeature sf = contactFeatureStarts.get(index);
457  20 if (!sf.isContactFeature())
458    {
459  0 System.err.println("Error! non-contact feature " + sf.toString()
460    + " in contact features list");
461  0 index++;
462  0 continue;
463    }
464  20 if (sf.getBegin() > to)
465    {
466    /*
467    * this feature's start (and all following) follows the target range
468    */
469  10 break;
470    }
471   
472    /*
473    * feature has begin >= from and begin <= to
474    * i.e. contact start point lies within overlap search range
475    */
476  10 result.add(sf);
477  10 index++;
478    }
479    }
480   
481    /**
482    * Answers a list of all positional features stored, in no guaranteed order
483    *
484    * @return
485    */
 
486  3936 toggle public List<SequenceFeature> getPositionalFeatures()
487    {
488  3936 List<SequenceFeature> result = new ArrayList<>();
489   
490    /*
491    * add any contact features - from the list by start position
492    */
493  3936 if (contactFeatureStarts != null)
494    {
495  63 result.addAll(contactFeatureStarts);
496    }
497   
498    /*
499    * add any nested features
500    */
501  3936 if (features != null)
502    {
503  3936 result.addAll(features);
504    }
505   
506  3936 return result;
507    }
508   
509    /**
510    * Answers a list of all contact features. If there are none, returns an
511    * immutable empty list.
512    *
513    * @return
514    */
 
515  19 toggle public List<SequenceFeature> getContactFeatures()
516    {
517  19 if (contactFeatureStarts == null)
518    {
519  14 return Collections.emptyList();
520    }
521  5 return new ArrayList<>(contactFeatureStarts);
522    }
523   
524    /**
525    * Answers a list of all non-positional features. If there are none, returns
526    * an immutable empty list.
527    *
528    * @return
529    */
 
530  3856 toggle public List<SequenceFeature> getNonPositionalFeatures()
531    {
532  3856 if (nonPositionalFeatures == null)
533    {
534  3736 return Collections.emptyList();
535    }
536  120 return new ArrayList<>(nonPositionalFeatures);
537    }
538   
539    /**
540    * Deletes the given feature from the store, returning true if it was found
541    * (and deleted), else false. This method makes no assumption that the feature
542    * is in the 'expected' place in the store, in case it has been modified since
543    * it was added.
544    *
545    * @param sf
546    */
 
547  390 toggle public synchronized boolean delete(SequenceFeature sf)
548    {
549  390 boolean removed = false;
550   
551    /*
552    * try contact positions (and if found, delete
553    * from both lists of contact positions)
554    */
555  390 if (!removed && contactFeatureStarts != null)
556    {
557  55 removed = contactFeatureStarts.remove(sf);
558  55 if (removed)
559    {
560  14 contactFeatureEnds.remove(sf);
561    }
562    }
563   
564  390 boolean removedNonPositional = false;
565   
566    /*
567    * if not found, try non-positional features
568    */
569  390 if (!removed && nonPositionalFeatures != null)
570    {
571  59 removedNonPositional = nonPositionalFeatures.remove(sf);
572  59 removed = removedNonPositional;
573    }
574   
575    /*
576    * if not found, try nested features
577    */
578  390 if (!removed && features != null)
579    {
580  359 removed = features.remove(sf);
581    }
582   
583  390 if (removed)
584    {
585  295 rescanAfterDelete();
586    }
587   
588  390 return removed;
589    }
590   
591    /**
592    * Rescan all features to recompute any cached values after an entry has been
593    * deleted. This is expected to be an infrequent event, so performance here is
594    * not critical.
595    */
 
596  295 toggle protected synchronized void rescanAfterDelete()
597    {
598  295 positionalFeatureGroups.clear();
599  295 nonPositionalFeatureGroups.clear();
600  295 totalExtent = 0;
601  295 positionalMinScore = Float.NaN;
602  295 positionalMaxScore = Float.NaN;
603  295 nonPositionalMinScore = Float.NaN;
604  295 nonPositionalMaxScore = Float.NaN;
605   
606    /*
607    * scan non-positional features for groups and scores
608    */
609  295 for (SequenceFeature sf : getNonPositionalFeatures())
610    {
611  19 nonPositionalFeatureGroups.add(sf.getFeatureGroup());
612  19 float score = sf.getScore();
613  19 nonPositionalMinScore = min(nonPositionalMinScore, score);
614  19 nonPositionalMaxScore = max(nonPositionalMaxScore, score);
615    }
616   
617    /*
618    * scan positional features for groups, scores and extents
619    */
620  295 for (SequenceFeature sf : getPositionalFeatures())
621    {
622  3048 positionalFeatureGroups.add(sf.getFeatureGroup());
623  3048 float score = sf.getScore();
624  3048 positionalMinScore = min(positionalMinScore, score);
625  3048 positionalMaxScore = max(positionalMaxScore, score);
626  3048 totalExtent += getFeatureLength(sf);
627    }
628    }
629   
630    /**
631    * A helper method to return the minimum of two floats, where a non-NaN value
632    * is treated as 'less than' a NaN value (unlike Math.min which does the
633    * opposite)
634    *
635    * @param f1
636    * @param f2
637    */
 
638  104888 toggle protected static float min(float f1, float f2)
639    {
640  104888 if (Float.isNaN(f1))
641    {
642  2502 return Float.isNaN(f2) ? f1 : f2;
643    }
644    else
645    {
646  102386 return Float.isNaN(f2) ? f1 : Math.min(f1, f2);
647    }
648    }
649   
650    /**
651    * A helper method to return the maximum of two floats, where a non-NaN value
652    * is treated as 'greater than' a NaN value (unlike Math.max which does the
653    * opposite)
654    *
655    * @param f1
656    * @param f2
657    */
 
658  104888 toggle protected static float max(float f1, float f2)
659    {
660  104888 if (Float.isNaN(f1))
661    {
662  2502 return Float.isNaN(f2) ? f1 : f2;
663    }
664    else
665    {
666  102386 return Float.isNaN(f2) ? f1 : Math.max(f1, f2);
667    }
668    }
669   
670    /**
671    * Answers true if this store has no features, else false
672    *
673    * @return
674    */
 
675  1134 toggle public boolean isEmpty()
676    {
677  1134 boolean hasFeatures = (contactFeatureStarts != null
678    && !contactFeatureStarts
679    .isEmpty())
680    || (nonPositionalFeatures != null && !nonPositionalFeatures
681    .isEmpty())
682    || (features != null && features.size() > 0);
683   
684  1134 return !hasFeatures;
685    }
686   
687    /**
688    * Answers the set of distinct feature groups stored, possibly including null,
689    * as an unmodifiable view of the set. The parameter determines whether the
690    * groups for positional or for non-positional features are returned.
691    *
692    * @param positionalFeatures
693    * @return
694    */
 
695  2294 toggle public Set<String> getFeatureGroups(boolean positionalFeatures)
696    {
697  2294 if (positionalFeatures)
698    {
699  2220 return Collections.unmodifiableSet(positionalFeatureGroups);
700    }
701    else
702    {
703  74 return nonPositionalFeatureGroups == null ? Collections
704    .<String> emptySet() : Collections
705    .unmodifiableSet(nonPositionalFeatureGroups);
706    }
707    }
708   
709    /**
710    * Answers the number of positional (or non-positional) features stored.
711    * Contact features count as 1.
712    *
713    * @param positional
714    * @return
715    */
 
716  79 toggle public int getFeatureCount(boolean positional)
717    {
718  79 if (!positional)
719    {
720  26 return nonPositionalFeatures == null ? 0 : nonPositionalFeatures
721    .size();
722    }
723   
724  53 int size = 0;
725   
726  53 if (contactFeatureStarts != null)
727    {
728    // note a contact feature (start/end) counts as one
729  14 size += contactFeatureStarts.size();
730    }
731   
732  53 if (features != null)
733    {
734  53 size += features.size();
735    }
736   
737  53 return size;
738    }
739   
740    /**
741    * Answers the total length of positional features (or zero if there are
742    * none). Contact features contribute a value of 1 to the total.
743    *
744    * @return
745    */
 
746  42 toggle public int getTotalFeatureLength()
747    {
748  42 return totalExtent;
749    }
750   
751    /**
752    * Answers the minimum score held for positional or non-positional features.
753    * This may be Float.NaN if there are no features, are none has a non-NaN
754    * score.
755    *
756    * @param positional
757    * @return
758    */
 
759  722 toggle public float getMinimumScore(boolean positional)
760    {
761  722 return positional ? positionalMinScore : nonPositionalMinScore;
762    }
763   
764    /**
765    * Answers the maximum score held for positional or non-positional features.
766    * This may be Float.NaN if there are no features, are none has a non-NaN
767    * score.
768    *
769    * @param positional
770    * @return
771    */
 
772  696 toggle public float getMaximumScore(boolean positional)
773    {
774  696 return positional ? positionalMaxScore : nonPositionalMaxScore;
775    }
776   
777    /**
778    * Answers a list of all either positional or non-positional features whose
779    * feature group matches the given group (which may be null)
780    *
781    * @param positional
782    * @param group
783    * @return
784    */
 
785  48 toggle public List<SequenceFeature> getFeaturesForGroup(boolean positional,
786    String group)
787    {
788  48 List<SequenceFeature> result = new ArrayList<>();
789   
790    /*
791    * if we know features don't include the target group, no need
792    * to inspect them for matches
793    */
794  48 if (positional && !positionalFeatureGroups.contains(group)
795    || !positional && !nonPositionalFeatureGroups.contains(group))
796    {
797  7 return result;
798    }
799   
800  41 List<SequenceFeature> sfs = positional ? getPositionalFeatures()
801    : getNonPositionalFeatures();
802  41 for (SequenceFeature sf : sfs)
803    {
804  58 String featureGroup = sf.getFeatureGroup();
805  58 if (group == null && featureGroup == null || group != null
806    && group.equals(featureGroup))
807    {
808  41 result.add(sf);
809    }
810    }
811  41 return result;
812    }
813   
814    /**
815    * Adds the shift amount to the start and end of all positional features whose
816    * start position is at or after fromPosition. Returns true if at least one
817    * feature was shifted, else false.
818    *
819    * @param fromPosition
820    * @param shiftBy
821    * @return
822    */
 
823  45 toggle public synchronized boolean shiftFeatures(int fromPosition, int shiftBy)
824    {
825    /*
826    * Because begin and end are final fields (to ensure the data store's
827    * integrity), we have to delete each feature and re-add it as amended.
828    * (Although a simple shift of all values would preserve data integrity!)
829    */
830  45 boolean modified = false;
831  45 for (SequenceFeature sf : getPositionalFeatures())
832    {
833  250 if (sf.getBegin() >= fromPosition)
834    {
835  40 modified = true;
836  40 int newBegin = sf.getBegin() + shiftBy;
837  40 int newEnd = sf.getEnd() + shiftBy;
838   
839    /*
840    * sanity check: don't shift left of the first residue
841    */
842  40 if (newEnd > 0)
843    {
844  38 newBegin = Math.max(1, newBegin);
845  38 SequenceFeature sf2 = new SequenceFeature(sf, newBegin, newEnd,
846    sf.getFeatureGroup(), sf.getScore());
847  38 addFeature(sf2);
848    }
849  40 delete(sf);
850    }
851    }
852  45 return modified;
853    }
854    }