Clover icon

Coverage Report

  1. Project Clover database Mon Nov 11 2024 17:27:16 GMT
  2. Package jalview.datamodel.features

File FeatureStore.java

 

Coverage histogram

../../../img/srcFileCovDistChart10.png
0% of files have more coverage

Code metrics

118
186
27
1
851
434
97
0.52
6.89
27
3.59

Classes

Class Line # Actions
FeatureStore 43 186 97
0.9546827795.5%
 

Contributing tests

This file is covered by 199 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.datamodel.features;
22   
23    import java.util.ArrayList;
24    import java.util.Collections;
25    import java.util.HashSet;
26    import java.util.List;
27    import java.util.Set;
28   
29    import intervalstore.api.IntervalStoreI;
30    import intervalstore.impl.BinarySearcher;
31    import intervalstore.impl.BinarySearcher.Compare;
32    import intervalstore.impl.IntervalStore;
33    import jalview.datamodel.SequenceFeature;
34   
35    /**
36    * A data store for a set of sequence features that supports efficient lookup of
37    * features overlapping a given range. Intended for (but not limited to) storage
38    * of features for one sequence and feature type.
39    *
40    * @author gmcarstairs
41    *
42    */
 
43    public class FeatureStore
44    {
45    /*
46    * Non-positional features have no (zero) start/end position.
47    * Kept as a separate list in case this criterion changes in future.
48    */
49    List<SequenceFeature> nonPositionalFeatures;
50   
51    /*
52    * contact features ordered by first contact position
53    */
54    List<SequenceFeature> contactFeatureStarts;
55   
56    /*
57    * contact features ordered by second contact position
58    */
59    List<SequenceFeature> contactFeatureEnds;
60   
61    /*
62    * IntervalStore holds remaining features and provides efficient
63    * query for features overlapping any given interval
64    */
65    IntervalStoreI<SequenceFeature> features;
66   
67    /*
68    * Feature groups represented in stored positional features
69    * (possibly including null)
70    */
71    Set<String> positionalFeatureGroups;
72   
73    /*
74    * Feature groups represented in stored non-positional features
75    * (possibly including null)
76    */
77    Set<String> nonPositionalFeatureGroups;
78   
79    /*
80    * the total length of all positional features; contact features count 1 to
81    * the total and 1 to size(), consistent with an average 'feature length' of 1
82    */
83    int totalExtent;
84   
85    float positionalMinScore;
86   
87    float positionalMaxScore;
88   
89    float nonPositionalMinScore;
90   
91    float nonPositionalMaxScore;
92   
93    /**
94    * Constructor
95    */
 
96  2581 toggle public FeatureStore()
97    {
98  2581 features = new IntervalStore<>();
99  2581 positionalFeatureGroups = new HashSet<>();
100  2581 nonPositionalFeatureGroups = new HashSet<>();
101  2581 positionalMinScore = Float.NaN;
102  2581 positionalMaxScore = Float.NaN;
103  2581 nonPositionalMinScore = Float.NaN;
104  2581 nonPositionalMaxScore = Float.NaN;
105   
106    // we only construct nonPositionalFeatures, contactFeatures if we need to
107    }
108   
109    /**
110    * Adds one sequence feature to the store, and returns true, unless the
111    * feature is already contained in the store, in which case this method
112    * returns false. Containment is determined by SequenceFeature.equals()
113    * comparison.
114    *
115    * @param feature
116    */
 
117  212489 toggle public boolean addFeature(SequenceFeature feature)
118    {
119  212489 if (contains(feature))
120    {
121  78483 return false;
122    }
123   
124    /*
125    * keep a record of feature groups
126    */
127  134006 if (!feature.isNonPositional())
128    {
129  133934 positionalFeatureGroups.add(feature.getFeatureGroup());
130    }
131   
132  134006 if (feature.isContactFeature())
133    {
134  45 addContactFeature(feature);
135    }
136  133961 else if (feature.isNonPositional())
137    {
138  72 addNonPositionalFeature(feature);
139    }
140    else
141    {
142  133889 addNestedFeature(feature);
143    }
144   
145    /*
146    * record the total extent of positional features, to make
147    * getTotalFeatureLength possible; we count the length of a
148    * contact feature as 1
149    */
150  134006 totalExtent += getFeatureLength(feature);
151   
152    /*
153    * record the minimum and maximum score for positional
154    * and non-positional features
155    */
156  134006 float score = feature.getScore();
157  134006 if (!Float.isNaN(score))
158    {
159  133862 if (feature.isNonPositional())
160    {
161  49 nonPositionalMinScore = min(nonPositionalMinScore, score);
162  49 nonPositionalMaxScore = max(nonPositionalMaxScore, score);
163    }
164    else
165    {
166  133813 positionalMinScore = min(positionalMinScore, score);
167  133813 positionalMaxScore = max(positionalMaxScore, score);
168    }
169    }
170   
171  134006 return true;
172    }
173   
174    /**
175    * Answers true if this store contains the given feature (testing by
176    * SequenceFeature.equals), else false
177    *
178    * @param feature
179    * @return
180    */
 
181  212502 toggle public boolean contains(SequenceFeature feature)
182    {
183  212502 if (feature.isNonPositional())
184    {
185  80 return nonPositionalFeatures == null ? false
186    : nonPositionalFeatures.contains(feature);
187    }
188   
189  212422 if (feature.isContactFeature())
190    {
191  49 return contactFeatureStarts == null ? false
192    : listContains(contactFeatureStarts, feature);
193    }
194   
195  212373 return features == null ? false : features.contains(feature);
196    }
197   
198    /**
199    * Answers the 'length' of the feature, counting 0 for non-positional features
200    * and 1 for contact features
201    *
202    * @param feature
203    * @return
204    */
 
205  137058 toggle protected static int getFeatureLength(SequenceFeature feature)
206    {
207  137058 if (feature.isNonPositional())
208    {
209  73 return 0;
210    }
211  136985 if (feature.isContactFeature())
212    {
213  65 return 1;
214    }
215  136920 return 1 + feature.getEnd() - feature.getBegin();
216    }
217   
218    /**
219    * Adds the feature to the list of non-positional features (with lazy
220    * instantiation of the list if it is null), and returns true. The feature
221    * group is added to the set of distinct feature groups for non-positional
222    * features. This method allows duplicate features, so test before calling to
223    * prevent this.
224    *
225    * @param feature
226    */
 
227  72 toggle protected boolean addNonPositionalFeature(SequenceFeature feature)
228    {
229  72 if (nonPositionalFeatures == null)
230    {
231  67 nonPositionalFeatures = new ArrayList<>();
232    }
233   
234  72 nonPositionalFeatures.add(feature);
235   
236  72 nonPositionalFeatureGroups.add(feature.getFeatureGroup());
237   
238  72 return true;
239    }
240   
241    /**
242    * Adds one feature to the IntervalStore that can manage nested features
243    * (creating the IntervalStore if necessary)
244    */
 
245  133889 toggle protected synchronized void addNestedFeature(SequenceFeature feature)
246    {
247  133889 if (features == null)
248    {
249  0 features = new IntervalStore<>();
250    }
251  133889 features.add(feature);
252    }
253   
254    /**
255    * Add a contact feature to the lists that hold them ordered by start (first
256    * contact) and by end (second contact) position, ensuring the lists remain
257    * ordered, and returns true. This method allows duplicate features to be
258    * added, so test before calling to avoid this.
259    *
260    * @param feature
261    * @return
262    */
 
263  45 toggle protected synchronized boolean addContactFeature(SequenceFeature feature)
264    {
265  45 if (contactFeatureStarts == null)
266    {
267  26 contactFeatureStarts = new ArrayList<>();
268    }
269  45 if (contactFeatureEnds == null)
270    {
271  26 contactFeatureEnds = new ArrayList<>();
272    }
273   
274    /*
275    * insert into list sorted by start (first contact position):
276    * binary search the sorted list to find the insertion point
277    */
278  45 int insertPosition = BinarySearcher.findFirst(contactFeatureStarts,
279    true, Compare.GE, feature.getBegin());
280  45 contactFeatureStarts.add(insertPosition, feature);
281   
282    /*
283    * insert into list sorted by end (second contact position):
284    * binary search the sorted list to find the insertion point
285    */
286  45 insertPosition = BinarySearcher.findFirst(contactFeatureEnds, false,
287    Compare.GE, feature.getEnd());
288  45 contactFeatureEnds.add(insertPosition, feature);
289   
290  45 return true;
291    }
292   
293    /**
294    * Answers true if the list contains the feature, else false. This method is
295    * optimised for the condition that the list is sorted on feature start
296    * position ascending, and will give unreliable results if this does not hold.
297    *
298    * @param features
299    * @param feature
300    * @return
301    */
 
302  29 toggle protected static boolean listContains(List<SequenceFeature> features,
303    SequenceFeature feature)
304    {
305  29 if (features == null || feature == null)
306    {
307  3 return false;
308    }
309   
310    /*
311    * locate the first entry in the list which does not precede the feature
312    */
313    // int pos = binarySearch(features,
314    // SearchCriterion.byFeature(feature, RangeComparator.BY_START_POSITION));
315  26 int pos = BinarySearcher.findFirst(features, true, Compare.GE,
316    feature.getBegin());
317  26 int len = features.size();
318  35 while (pos < len)
319    {
320  18 SequenceFeature sf = features.get(pos);
321  18 if (sf.getBegin() > feature.getBegin())
322    {
323  5 return false; // no match found
324    }
325  13 if (sf.equals(feature))
326    {
327  4 return true;
328    }
329  9 pos++;
330    }
331  17 return false;
332    }
333   
334    /**
335    * Returns a (possibly empty) list of features whose extent overlaps the given
336    * range. The returned list is not ordered. Contact features are included if
337    * either of the contact points lies within the range.
338    *
339    * @param start
340    * start position of overlap range (inclusive)
341    * @param end
342    * end position of overlap range (inclusive)
343    * @return
344    */
 
345  89250 toggle public List<SequenceFeature> findOverlappingFeatures(long start, long end)
346    {
347  89250 List<SequenceFeature> result = new ArrayList<>();
348   
349  89250 findContactFeatures(start, end, result);
350   
351  89250 if (features != null)
352    {
353  89250 result.addAll(features.findOverlaps(start, end));
354    }
355   
356  89250 return result;
357    }
358   
359    /**
360    * Adds contact features to the result list where either the second or the
361    * first contact position lies within the target range
362    *
363    * @param from
364    * @param to
365    * @param result
366    */
 
367  89250 toggle protected void findContactFeatures(long from, long to,
368    List<SequenceFeature> result)
369    {
370  89250 if (contactFeatureStarts != null)
371    {
372  41 findContactStartOverlaps(from, to, result);
373    }
374  89250 if (contactFeatureEnds != null)
375    {
376  41 findContactEndOverlaps(from, to, result);
377    }
378    }
379   
380    /**
381    * Adds to the result list any contact features whose end (second contact
382    * point), but not start (first contact point), lies in the query from-to
383    * range
384    *
385    * @param from
386    * @param to
387    * @param result
388    */
 
389  41 toggle protected void findContactEndOverlaps(long from, long to,
390    List<SequenceFeature> result)
391    {
392    /*
393    * find the first contact feature (if any)
394    * whose end point is not before the target range
395    */
396  41 int index = BinarySearcher.findFirst(contactFeatureEnds, false,
397    Compare.GE, (int) from);
398   
399  67 while (index < contactFeatureEnds.size())
400    {
401  52 SequenceFeature sf = contactFeatureEnds.get(index);
402  52 if (!sf.isContactFeature())
403    {
404  0 jalview.bin.Console.errPrintln("Error! non-contact feature type "
405    + sf.getType() + " in contact features list");
406  0 index++;
407  0 continue;
408    }
409   
410  52 int begin = sf.getBegin();
411  52 if (begin >= from && begin <= to)
412    {
413    /*
414    * this feature's first contact position lies in the search range
415    * so we don't include it in results a second time
416    */
417  10 index++;
418  10 continue;
419    }
420   
421  42 if (sf.getEnd() > to)
422    {
423    /*
424    * this feature (and all following) has end point after the target range
425    */
426  26 break;
427    }
428   
429    /*
430    * feature has end >= from and end <= to
431    * i.e. contact end point lies within overlap search range
432    */
433  16 result.add(sf);
434  16 index++;
435    }
436    }
437   
438    /**
439    * Adds contact features whose start position lies in the from-to range to the
440    * result list
441    *
442    * @param from
443    * @param to
444    * @param result
445    */
 
446  41 toggle protected void findContactStartOverlaps(long from, long to,
447    List<SequenceFeature> result)
448    {
449  41 int index = BinarySearcher.findFirst(contactFeatureStarts, true,
450    Compare.GE, (int) from);
451   
452  51 while (index < contactFeatureStarts.size())
453    {
454  20 SequenceFeature sf = contactFeatureStarts.get(index);
455  20 if (!sf.isContactFeature())
456    {
457  0 jalview.bin.Console.errPrintln("Error! non-contact feature "
458    + sf.toString() + " in contact features list");
459  0 index++;
460  0 continue;
461    }
462  20 if (sf.getBegin() > to)
463    {
464    /*
465    * this feature's start (and all following) follows the target range
466    */
467  10 break;
468    }
469   
470    /*
471    * feature has begin >= from and begin <= to
472    * i.e. contact start point lies within overlap search range
473    */
474  10 result.add(sf);
475  10 index++;
476    }
477    }
478   
479    /**
480    * Answers a list of all positional features stored, in no guaranteed order
481    *
482    * @return
483    */
 
484  4129 toggle public List<SequenceFeature> getPositionalFeatures()
485    {
486  4129 List<SequenceFeature> result = new ArrayList<>();
487   
488    /*
489    * add any contact features - from the list by start position
490    */
491  4129 if (contactFeatureStarts != null)
492    {
493  64 result.addAll(contactFeatureStarts);
494    }
495   
496    /*
497    * add any nested features
498    */
499  4129 if (features != null)
500    {
501  4129 result.addAll(features);
502    }
503   
504  4129 return result;
505    }
506   
507    /**
508    * Answers a list of all contact features. If there are none, returns an
509    * immutable empty list.
510    *
511    * @return
512    */
 
513  19 toggle public List<SequenceFeature> getContactFeatures()
514    {
515  19 if (contactFeatureStarts == null)
516    {
517  14 return Collections.emptyList();
518    }
519  5 return new ArrayList<>(contactFeatureStarts);
520    }
521   
522    /**
523    * Answers a list of all non-positional features. If there are none, returns
524    * an immutable empty list.
525    *
526    * @return
527    */
 
528  4015 toggle public List<SequenceFeature> getNonPositionalFeatures()
529    {
530  4015 if (nonPositionalFeatures == null)
531    {
532  3892 return Collections.emptyList();
533    }
534  123 return new ArrayList<>(nonPositionalFeatures);
535    }
536   
537    /**
538    * Deletes the given feature from the store, returning true if it was found
539    * (and deleted), else false. This method makes no assumption that the feature
540    * is in the 'expected' place in the store, in case it has been modified since
541    * it was added.
542    *
543    * @param sf
544    */
 
545  400 toggle public synchronized boolean delete(SequenceFeature sf)
546    {
547  400 boolean removed = false;
548   
549    /*
550    * try contact positions (and if found, delete
551    * from both lists of contact positions)
552    */
553  400 if (!removed && contactFeatureStarts != null)
554    {
555  57 removed = contactFeatureStarts.remove(sf);
556  57 if (removed)
557    {
558  14 contactFeatureEnds.remove(sf);
559    }
560    }
561   
562  400 boolean removedNonPositional = false;
563   
564    /*
565    * if not found, try non-positional features
566    */
567  400 if (!removed && nonPositionalFeatures != null)
568    {
569  59 removedNonPositional = nonPositionalFeatures.remove(sf);
570  59 removed = removedNonPositional;
571    }
572   
573    /*
574    * if not found, try nested features
575    */
576  400 if (!removed && features != null)
577    {
578  369 removed = features.remove(sf);
579    }
580   
581  400 if (removed)
582    {
583  297 rescanAfterDelete();
584    }
585   
586  400 return removed;
587    }
588   
589    /**
590    * Rescan all features to recompute any cached values after an entry has been
591    * deleted. This is expected to be an infrequent event, so performance here is
592    * not critical.
593    */
 
594  297 toggle protected synchronized void rescanAfterDelete()
595    {
596  297 positionalFeatureGroups.clear();
597  297 nonPositionalFeatureGroups.clear();
598  297 totalExtent = 0;
599  297 positionalMinScore = Float.NaN;
600  297 positionalMaxScore = Float.NaN;
601  297 nonPositionalMinScore = Float.NaN;
602  297 nonPositionalMaxScore = Float.NaN;
603   
604    /*
605    * scan non-positional features for groups and scores
606    */
607  297 for (SequenceFeature sf : getNonPositionalFeatures())
608    {
609  19 nonPositionalFeatureGroups.add(sf.getFeatureGroup());
610  19 float score = sf.getScore();
611  19 nonPositionalMinScore = min(nonPositionalMinScore, score);
612  19 nonPositionalMaxScore = max(nonPositionalMaxScore, score);
613    }
614   
615    /*
616    * scan positional features for groups, scores and extents
617    */
618  297 for (SequenceFeature sf : getPositionalFeatures())
619    {
620  3049 positionalFeatureGroups.add(sf.getFeatureGroup());
621  3049 float score = sf.getScore();
622  3049 positionalMinScore = min(positionalMinScore, score);
623  3049 positionalMaxScore = max(positionalMaxScore, score);
624  3049 totalExtent += getFeatureLength(sf);
625    }
626    }
627   
628    /**
629    * A helper method to return the minimum of two floats, where a non-NaN value
630    * is treated as 'less than' a NaN value (unlike Math.min which does the
631    * opposite)
632    *
633    * @param f1
634    * @param f2
635    */
 
636  136934 toggle protected static float min(float f1, float f2)
637    {
638  136934 if (Float.isNaN(f1))
639    {
640  2817 return Float.isNaN(f2) ? f1 : f2;
641    }
642    else
643    {
644  134117 return Float.isNaN(f2) ? f1 : Math.min(f1, f2);
645    }
646    }
647   
648    /**
649    * A helper method to return the maximum of two floats, where a non-NaN value
650    * is treated as 'greater than' a NaN value (unlike Math.max which does the
651    * opposite)
652    *
653    * @param f1
654    * @param f2
655    */
 
656  136934 toggle protected static float max(float f1, float f2)
657    {
658  136934 if (Float.isNaN(f1))
659    {
660  2817 return Float.isNaN(f2) ? f1 : f2;
661    }
662    else
663    {
664  134117 return Float.isNaN(f2) ? f1 : Math.max(f1, f2);
665    }
666    }
667   
668    /**
669    * Answers true if this store has no features, else false
670    *
671    * @return
672    */
 
673  724 toggle public boolean isEmpty()
674    {
675  724 boolean hasFeatures = (contactFeatureStarts != null
676    && !contactFeatureStarts.isEmpty())
677    || (nonPositionalFeatures != null
678    && !nonPositionalFeatures.isEmpty())
679    || (features != null && features.size() > 0);
680   
681  724 return !hasFeatures;
682    }
683   
684    /**
685    * Answers the set of distinct feature groups stored, possibly including null,
686    * as an unmodifiable view of the set. The parameter determines whether the
687    * groups for positional or for non-positional features are returned.
688    *
689    * @param positionalFeatures
690    * @return
691    */
 
692  6418 toggle public Set<String> getFeatureGroups(boolean positionalFeatures)
693    {
694  6418 if (positionalFeatures)
695    {
696  6344 return Collections.unmodifiableSet(positionalFeatureGroups);
697    }
698    else
699    {
700  74 return nonPositionalFeatureGroups == null
701    ? Collections.<String> emptySet()
702    : Collections.unmodifiableSet(nonPositionalFeatureGroups);
703    }
704    }
705   
706    /**
707    * Answers the number of positional (or non-positional) features stored.
708    * Contact features count as 1.
709    *
710    * @param positional
711    * @return
712    */
 
713  79 toggle public int getFeatureCount(boolean positional)
714    {
715  79 if (!positional)
716    {
717  26 return nonPositionalFeatures == null ? 0
718    : nonPositionalFeatures.size();
719    }
720   
721  53 int size = 0;
722   
723  53 if (contactFeatureStarts != null)
724    {
725    // note a contact feature (start/end) counts as one
726  14 size += contactFeatureStarts.size();
727    }
728   
729  53 if (features != null)
730    {
731  53 size += features.size();
732    }
733   
734  53 return size;
735    }
736   
737    /**
738    * Answers the total length of positional features (or zero if there are
739    * none). Contact features contribute a value of 1 to the total.
740    *
741    * @return
742    */
 
743  42 toggle public int getTotalFeatureLength()
744    {
745  42 return totalExtent;
746    }
747   
748    /**
749    * Answers the minimum score held for positional or non-positional features.
750    * This may be Float.NaN if there are no features, are none has a non-NaN
751    * score.
752    *
753    * @param positional
754    * @return
755    */
 
756  2111 toggle public float getMinimumScore(boolean positional)
757    {
758  2111 return positional ? positionalMinScore : nonPositionalMinScore;
759    }
760   
761    /**
762    * Answers the maximum score held for positional or non-positional features.
763    * This may be Float.NaN if there are no features, are none has a non-NaN
764    * score.
765    *
766    * @param positional
767    * @return
768    */
 
769  2085 toggle public float getMaximumScore(boolean positional)
770    {
771  2085 return positional ? positionalMaxScore : nonPositionalMaxScore;
772    }
773   
774    /**
775    * Answers a list of all either positional or non-positional features whose
776    * feature group matches the given group (which may be null)
777    *
778    * @param positional
779    * @param group
780    * @return
781    */
 
782  48 toggle public List<SequenceFeature> getFeaturesForGroup(boolean positional,
783    String group)
784    {
785  48 List<SequenceFeature> result = new ArrayList<>();
786   
787    /*
788    * if we know features don't include the target group, no need
789    * to inspect them for matches
790    */
791  48 if (positional && !positionalFeatureGroups.contains(group)
792    || !positional && !nonPositionalFeatureGroups.contains(group))
793    {
794  7 return result;
795    }
796   
797  41 List<SequenceFeature> sfs = positional ? getPositionalFeatures()
798    : getNonPositionalFeatures();
799  41 for (SequenceFeature sf : sfs)
800    {
801  58 String featureGroup = sf.getFeatureGroup();
802  58 if (group == null && featureGroup == null
803    || group != null && group.equals(featureGroup))
804    {
805  41 result.add(sf);
806    }
807    }
808  41 return result;
809    }
810   
811    /**
812    * Adds the shift amount to the start and end of all positional features whose
813    * start position is at or after fromPosition. Returns true if at least one
814    * feature was shifted, else false.
815    *
816    * @param fromPosition
817    * @param shiftBy
818    * @return
819    */
 
820  45 toggle public synchronized boolean shiftFeatures(int fromPosition, int shiftBy)
821    {
822    /*
823    * Because begin and end are final fields (to ensure the data store's
824    * integrity), we have to delete each feature and re-add it as amended.
825    * (Although a simple shift of all values would preserve data integrity!)
826    */
827  45 boolean modified = false;
828  45 for (SequenceFeature sf : getPositionalFeatures())
829    {
830  250 if (sf.getBegin() >= fromPosition)
831    {
832  40 modified = true;
833  40 int newBegin = sf.getBegin() + shiftBy;
834  40 int newEnd = sf.getEnd() + shiftBy;
835   
836    /*
837    * sanity check: don't shift left of the first residue
838    */
839  40 if (newEnd > 0)
840    {
841  38 newBegin = Math.max(1, newBegin);
842  38 SequenceFeature sf2 = new SequenceFeature(sf, newBegin, newEnd,
843    sf.getFeatureGroup(), sf.getScore());
844  38 addFeature(sf2);
845    }
846  40 delete(sf);
847    }
848    }
849  45 return modified;
850    }
851    }