Clover icon

jalviewX

  1. Project Clover database Wed Oct 31 2018 15:13:58 GMT
  2. Package jalview.datamodel.features

File NCList.java

 

Coverage histogram

../../../img/srcFileCovDistChart10.png
0% of files have more coverage

Code metrics

66
153
23
1
646
329
62
0.41
6.65
23
2.7

Classes

Class Line # Actions
NCList 40 153 62 2
0.991735599.2%
 

Contributing tests

This file is covered by 42 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.datamodel.features;
22   
23    import jalview.datamodel.ContiguousI;
24    import jalview.datamodel.Range;
25   
26    import java.util.ArrayList;
27    import java.util.Collections;
28    import java.util.List;
29   
30    /**
31    * An adapted implementation of NCList as described in the paper
32    *
33    * <pre>
34    * Nested Containment List (NCList): a new algorithm for accelerating
35    * interval query of genome alignment and interval databases
36    * - Alexander V. Alekseyenko, Christopher J. Lee
37    * https://doi.org/10.1093/bioinformatics/btl647
38    * </pre>
39    */
 
40    public class NCList<T extends ContiguousI>
41    {
42    /*
43    * the number of ranges represented
44    */
45    private int size;
46   
47    /*
48    * a list, in start position order, of sublists of ranges ordered so
49    * that each contains (or is the same as) the one that follows it
50    */
51    private List<NCNode<T>> subranges;
52   
53    /**
54    * Constructor given a list of things that are each located on a contiguous
55    * interval. Note that the constructor may reorder the list.
56    * <p>
57    * We assume here that for each range, start &lt;= end. Behaviour for reverse
58    * ordered ranges is undefined.
59    *
60    * @param ranges
61    */
 
62  34 toggle public NCList(List<T> ranges)
63    {
64  34 this();
65  34 build(ranges);
66    }
67   
68    /**
69    * Sort and group ranges into sublists where each sublist represents a region
70    * and its contained subregions
71    *
72    * @param ranges
73    */
 
74  34 toggle protected void build(List<T> ranges)
75    {
76    /*
77    * sort by start ascending so that contained intervals
78    * follow their containing interval
79    */
80  34 Collections.sort(ranges, RangeComparator.BY_START_POSITION);
81   
82  34 List<Range> sublists = buildSubranges(ranges);
83   
84    /*
85    * convert each subrange to an NCNode consisting of a range and
86    * (possibly) its contained NCList
87    */
88  34 for (Range sublist : sublists)
89    {
90  44 subranges.add(new NCNode<T>(ranges.subList(sublist.start,
91    sublist.end + 1)));
92    }
93   
94  34 size = ranges.size();
95    }
96   
 
97  280 toggle public NCList(T entry)
98    {
99  280 this();
100  280 subranges.add(new NCNode<>(entry));
101  280 size = 1;
102    }
103   
 
104  363 toggle public NCList()
105    {
106  363 subranges = new ArrayList<NCNode<T>>();
107    }
108   
109    /**
110    * Traverses the sorted ranges to identify sublists, within which each
111    * interval contains the one that follows it
112    *
113    * @param ranges
114    * @return
115    */
 
116  34 toggle protected List<Range> buildSubranges(List<T> ranges)
117    {
118  34 List<Range> sublists = new ArrayList<>();
119   
120  34 if (ranges.isEmpty())
121    {
122  1 return sublists;
123    }
124   
125  33 int listStartIndex = 0;
126  33 long lastEndPos = Long.MAX_VALUE;
127   
128  112 for (int i = 0; i < ranges.size(); i++)
129    {
130  79 ContiguousI nextInterval = ranges.get(i);
131  79 long nextStart = nextInterval.getBegin();
132  79 long nextEnd = nextInterval.getEnd();
133  79 if (nextStart > lastEndPos || nextEnd > lastEndPos)
134    {
135    /*
136    * this interval is not contained in the preceding one
137    * close off the last sublist
138    */
139  11 sublists.add(new Range(listStartIndex, i - 1));
140  11 listStartIndex = i;
141    }
142  79 lastEndPos = nextEnd;
143    }
144   
145  33 sublists.add(new Range(listStartIndex, ranges.size() - 1));
146  33 return sublists;
147    }
148   
149    /**
150    * Adds one entry to the stored set (with duplicates allowed)
151    *
152    * @param entry
153    */
 
154  307 toggle public void add(T entry)
155    {
156  307 add(entry, true);
157    }
158   
159    /**
160    * Adds one entry to the stored set, and returns true, unless allowDuplicates
161    * is set to false and it is already contained (by object equality test), in
162    * which case it is not added and this method returns false.
163    *
164    * @param entry
165    * @param allowDuplicates
166    * @return
167    */
 
168  482 toggle public synchronized boolean add(T entry, boolean allowDuplicates)
169    {
170  482 if (!allowDuplicates && contains(entry))
171    {
172  9 return false;
173    }
174   
175  473 size++;
176  473 long start = entry.getBegin();
177  473 long end = entry.getEnd();
178   
179    /*
180    * cases:
181    * - precedes all subranges: add as NCNode on front of list
182    * - follows all subranges: add as NCNode on end of list
183    * - enclosed by a subrange - add recursively to subrange
184    * - encloses one or more subranges - push them inside it
185    * - none of the above - add as a new node and resort nodes list (?)
186    */
187   
188    /*
189    * find the first subrange whose end does not precede entry's start
190    */
191  473 int candidateIndex = findFirstOverlap(start);
192  473 if (candidateIndex == -1)
193    {
194    /*
195    * all subranges precede this one - add it on the end
196    */
197  19 subranges.add(new NCNode<>(entry));
198  19 return true;
199    }
200   
201    /*
202    * search for maximal span of subranges i-k that the new entry
203    * encloses; or a subrange that encloses the new entry
204    */
205  454 boolean enclosing = false;
206  454 int firstEnclosed = 0;
207  454 int lastEnclosed = 0;
208  454 boolean overlapping = false;
209   
210  657 for (int j = candidateIndex; j < subranges.size(); j++)
211    {
212  588 NCNode<T> subrange = subranges.get(j);
213   
214  588 if (end < subrange.getBegin() && !overlapping && !enclosing)
215    {
216    /*
217    * new entry lies between subranges j-1 j
218    */
219  2 subranges.add(j, new NCNode<>(entry));
220  2 return true;
221    }
222   
223  586 if (subrange.getBegin() <= start && subrange.getEnd() >= end)
224    {
225    /*
226    * push new entry inside this subrange as it encloses it
227    */
228  352 subrange.add(entry);
229  352 return true;
230    }
231   
232  234 if (start <= subrange.getBegin())
233    {
234  91 if (end >= subrange.getEnd())
235    {
236    /*
237    * new entry encloses this subrange (and possibly preceding ones);
238    * continue to find the maximal list it encloses
239    */
240  60 if (!enclosing)
241    {
242  44 firstEnclosed = j;
243    }
244  60 lastEnclosed = j;
245  60 enclosing = true;
246  60 continue;
247    }
248    else
249    {
250    /*
251    * entry spans from before this subrange to inside it
252    */
253  31 if (enclosing)
254    {
255    /*
256    * entry encloses one or more preceding subranges
257    */
258  10 addEnclosingRange(entry, firstEnclosed, lastEnclosed);
259  10 return true;
260    }
261    else
262    {
263    /*
264    * entry spans two subranges but doesn't enclose any
265    * so just add it
266    */
267  21 subranges.add(j, new NCNode<>(entry));
268  21 return true;
269    }
270    }
271    }
272    else
273    {
274  143 overlapping = true;
275    }
276    }
277   
278    /*
279    * drops through to here if new range encloses all others
280    * or overlaps the last one
281    */
282  69 if (enclosing)
283    {
284  34 addEnclosingRange(entry, firstEnclosed, lastEnclosed);
285    }
286    else
287    {
288  35 subranges.add(new NCNode<>(entry));
289    }
290   
291  69 return true;
292    }
293   
294    /**
295    * Answers true if this NCList contains the given entry (by object equality
296    * test), else false
297    *
298    * @param entry
299    * @return
300    */
 
301  21843 toggle public boolean contains(T entry)
302    {
303    /*
304    * find the first sublist that might overlap, i.e.
305    * the first whose end position is >= from
306    */
307  21843 int candidateIndex = findFirstOverlap(entry.getBegin());
308   
309  21843 if (candidateIndex == -1)
310    {
311  805 return false;
312    }
313   
314  21038 int to = entry.getEnd();
315   
316  43921 for (int i = candidateIndex; i < subranges.size(); i++)
317    {
318  31176 NCNode<T> candidate = subranges.get(i);
319  31176 if (candidate.getBegin() > to)
320    {
321    /*
322    * we are past the end of our target range
323    */
324  547 break;
325    }
326  30629 if (candidate.contains(entry))
327    {
328  7746 return true;
329    }
330    }
331  13292 return false;
332    }
333   
334    /**
335    * Update the tree so that the range of the new entry encloses subranges i to
336    * j (inclusive). That is, replace subranges i-j (inclusive) with a new
337    * subrange that contains them.
338    *
339    * @param entry
340    * @param i
341    * @param j
342    */
 
343  44 toggle protected synchronized void addEnclosingRange(T entry, final int i,
344    final int j)
345    {
346  44 NCList<T> newNCList = new NCList<>();
347  44 newNCList.addNodes(subranges.subList(i, j + 1));
348  44 NCNode<T> newNode = new NCNode<>(entry, newNCList);
349  104 for (int k = j; k >= i; k--)
350    {
351  60 subranges.remove(k);
352    }
353  44 subranges.add(i, newNode);
354    }
355   
 
356  44 toggle protected void addNodes(List<NCNode<T>> nodes)
357    {
358  44 for (NCNode<T> node : nodes)
359    {
360  60 subranges.add(node);
361  60 size += node.size();
362    }
363    }
364   
365    /**
366    * Returns a (possibly empty) list of items whose extent overlaps the given
367    * range
368    *
369    * @param from
370    * start of overlap range (inclusive)
371    * @param to
372    * end of overlap range (inclusive)
373    * @return
374    */
 
375  61 toggle public List<T> findOverlaps(long from, long to)
376    {
377  61 List<T> result = new ArrayList<>();
378   
379  61 findOverlaps(from, to, result);
380   
381  61 return result;
382    }
383   
384    /**
385    * Recursively searches the NCList adding any items that overlap the from-to
386    * range to the result list
387    *
388    * @param from
389    * @param to
390    * @param result
391    */
 
392  765 toggle protected void findOverlaps(long from, long to, List<T> result)
393    {
394    /*
395    * find the first sublist that might overlap, i.e.
396    * the first whose end position is >= from
397    */
398  765 int candidateIndex = findFirstOverlap(from);
399   
400  765 if (candidateIndex == -1)
401    {
402  25 return;
403    }
404   
405  1802 for (int i = candidateIndex; i < subranges.size(); i++)
406    {
407  1128 NCNode<T> candidate = subranges.get(i);
408  1128 if (candidate.getBegin() > to)
409    {
410    /*
411    * we are past the end of our target range
412    */
413  66 break;
414    }
415  1062 candidate.findOverlaps(from, to, result);
416    }
417   
418    }
419   
420    /**
421    * Search subranges for the first one whose end position is not before the
422    * target range's start position, i.e. the first one that may overlap the
423    * target range. Returns the index in the list of the first such range found,
424    * or -1 if none found.
425    *
426    * @param from
427    * @return
428    */
 
429  23081 toggle protected int findFirstOverlap(long from)
430    {
431    /*
432    * The NCList paper describes binary search for this step,
433    * but this not implemented here as (a) I haven't understood it yet
434    * and (b) it seems to imply complications for adding to an NCList
435    */
436   
437  23081 int i = 0;
438  23081 if (subranges != null)
439    {
440  23081 for (NCNode<T> subrange : subranges)
441    {
442  24473 if (subrange.getEnd() >= from)
443    {
444  22232 return i;
445    }
446  2241 i++;
447    }
448    }
449  849 return -1;
450    }
451   
452    /**
453    * Formats the tree as a bracketed list e.g.
454    *
455    * <pre>
456    * [1-100 [10-30 [10-20]], 15-30 [20-20]]
457    * </pre>
458    */
 
459  45 toggle @Override
460    public String toString()
461    {
462  45 return subranges.toString();
463    }
464   
465    /**
466    * Returns a string representation of the data where containment is shown by
467    * indentation on new lines
468    *
469    * @return
470    */
 
471  2 toggle public String prettyPrint()
472    {
473  2 StringBuilder sb = new StringBuilder(512);
474  2 int offset = 0;
475  2 int indent = 2;
476  2 prettyPrint(sb, offset, indent);
477  2 sb.append(System.lineSeparator());
478  2 return sb.toString();
479    }
480   
481    /**
482    * @param sb
483    * @param offset
484    * @param indent
485    */
 
486  8 toggle void prettyPrint(StringBuilder sb, int offset, int indent)
487    {
488  8 boolean first = true;
489  8 for (NCNode<T> subrange : subranges)
490    {
491  12 if (!first)
492    {
493  4 sb.append(System.lineSeparator());
494    }
495  12 first = false;
496  12 subrange.prettyPrint(sb, offset, indent);
497    }
498    }
499   
500    /**
501    * Answers true if the data held satisfy the rules of construction of an
502    * NCList, else false.
503    *
504    * @return
505    */
 
506  223 toggle public boolean isValid()
507    {
508  223 return isValid(Integer.MIN_VALUE, Integer.MAX_VALUE);
509    }
510   
511    /**
512    * Answers true if the data held satisfy the rules of construction of an
513    * NCList bounded within the given start-end range, else false.
514    * <p>
515    * Each subrange must lie within start-end (inclusive). Subranges must be
516    * ordered by start position ascending.
517    * <p>
518    *
519    * @param start
520    * @param end
521    * @return
522    */
 
523  2776 toggle boolean isValid(final int start, final int end)
524    {
525  2776 int lastStart = start;
526  2776 for (NCNode<T> subrange : subranges)
527    {
528  4538 if (subrange.getBegin() < lastStart)
529    {
530  5 System.err.println("error in NCList: range " + subrange.toString()
531    + " starts before " + lastStart);
532  5 return false;
533    }
534  4533 if (subrange.getEnd() > end)
535    {
536  1 System.err.println("error in NCList: range " + subrange.toString()
537    + " ends after " + end);
538  1 return false;
539    }
540  4532 lastStart = subrange.getBegin();
541   
542  4532 if (!subrange.isValid())
543    {
544  7 return false;
545    }
546    }
547  2763 return true;
548    }
549   
550    /**
551    * Answers the lowest start position enclosed by the ranges
552    *
553    * @return
554    */
 
555  1 toggle public int getStart()
556    {
557  1 return subranges.isEmpty() ? 0 : subranges.get(0).getBegin();
558    }
559   
560    /**
561    * Returns the number of ranges held (deep count)
562    *
563    * @return
564    */
 
565  438 toggle public int size()
566    {
567  438 return size;
568    }
569   
570    /**
571    * Returns a list of all entries stored
572    *
573    * @return
574    */
 
575  330 toggle public List<T> getEntries()
576    {
577  330 List<T> result = new ArrayList<>();
578  330 getEntries(result);
579  330 return result;
580    }
581   
582    /**
583    * Adds all contained entries to the given list
584    *
585    * @param result
586    */
 
587  523 toggle void getEntries(List<T> result)
588    {
589  523 for (NCNode<T> subrange : subranges)
590    {
591  613 subrange.getEntries(result);
592    }
593    }
594   
595    /**
596    * Deletes the given entry from the store, returning true if it was found (and
597    * deleted), else false. This method makes no assumption that the entry is in
598    * the 'expected' place in the store, in case it has been modified since it
599    * was added. Only the first 'same object' match is deleted, not 'equal' or
600    * multiple objects.
601    *
602    * @param entry
603    */
 
604  881 toggle public synchronized boolean delete(T entry)
605    {
606  881 if (entry == null)
607    {
608  1 return false;
609    }
610  1911 for (int i = 0; i < subranges.size(); i++)
611    {
612  1344 NCNode<T> subrange = subranges.get(i);
613  1344 NCList<T> subRegions = subrange.getSubRegions();
614   
615  1344 if (subrange.getRegion() == entry)
616    {
617    /*
618    * if the subrange is rooted on this entry, promote its
619    * subregions (if any) to replace the subrange here;
620    * NB have to resort subranges after doing this since e.g.
621    * [10-30 [12-20 [16-18], 13-19]]
622    * after deleting 12-20, 16-18 is promoted to sibling of 13-19
623    * but should follow it in the list of subranges of 10-30
624    */
625  124 subranges.remove(i);
626  124 if (subRegions != null)
627    {
628  60 subranges.addAll(subRegions.subranges);
629  60 Collections.sort(subranges, RangeComparator.BY_START_POSITION);
630    }
631  124 size--;
632  124 return true;
633    }
634    else
635    {
636  1220 if (subRegions != null && subRegions.delete(entry))
637    {
638  189 size--;
639  189 subrange.deleteSubRegionsIfEmpty();
640  189 return true;
641    }
642    }
643    }
644  567 return false;
645    }
646    }