Clover icon

Coverage Report

  1. Project Clover database Mon Nov 11 2024 17:27:16 GMT
  2. Package jalview.datamodel

File SeqCigar.java

 

Coverage histogram

../../img/srcFileCovDistChart7.png
29% of files have more coverage

Code metrics

120
189
19
1
677
435
91
0.48
9.95
19
4.79

Classes

Class Line # Actions
SeqCigar 31 189 91
0.704268370.4%
 

Contributing tests

This file is covered by 19 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.datamodel;
22   
23    import jalview.analysis.AlignSeq;
24    import jalview.analysis.SeqsetUtils;
25    import jalview.util.MessageManager;
26    import jalview.util.ShiftList;
27   
28    import java.util.Enumeration;
29    import java.util.Hashtable;
30   
 
31    public class SeqCigar extends CigarSimple
32    {
33    /**
34    * start(inclusive) and end(exclusive) of subsequence on refseq
35    */
36    private int start, end;
37   
38    private SequenceI refseq = null;
39   
40    private Hashtable seqProps;
41   
42    /**
43    * Reference dataset sequence for the cigar string
44    *
45    * @return SequenceI
46    */
 
47  104 toggle public SequenceI getRefSeq()
48    {
49  104 return refseq;
50    }
51   
52    /**
53    *
54    * @return int start index of cigar ops on refSeq
55    */
 
56  0 toggle public int getStart()
57    {
58  0 return start;
59    }
60   
61    /**
62    *
63    * @return int end index (exclusive) of cigar ops on refSeq
64    */
 
65  0 toggle public int getEnd()
66    {
67  0 return end;
68    }
69   
70    /**
71    *
72    * @param column
73    * @return position in sequence for column (or -1 if no match state exists)
74    */
 
75  133 toggle public int findPosition(int column)
76    {
77  133 int w = 0, ew, p = refseq.findPosition(start);
78  133 if (column < 0)
79    {
80  0 return -1;
81    }
82  133 if (range != null)
83    {
84  215 for (int i = 0; i < length; i++)
85    {
86  215 if (operation[i] == M || operation[i] == D)
87    {
88  163 p += range[i];
89    }
90  215 if (operation[i] == M || operation[i] == I)
91    {
92  215 ew = w + range[i];
93  215 if (column < ew)
94    {
95  133 if (operation[i] == I)
96    {
97  22 return -1;
98    }
99  111 return p - (ew - column);
100    }
101  82 w = ew;
102    }
103    }
104    }
105  0 return -1;
106    }
107   
108    /**
109    * Returns sequence as a string with cigar operations applied to it
110    *
111    * @return String
112    */
 
113  26 toggle @Override
114    public String getSequenceString(char GapChar)
115    {
116  26 return (length == 0) ? ""
117    : (String) getSequenceAndDeletions(
118    refseq.getSequenceAsString(start, end), GapChar)[0];
119    }
120   
121    /**
122    * recreates a gapped and edited version of RefSeq or null for an empty cigar
123    * string
124    *
125    * @return SequenceI
126    */
 
127  172 toggle public SequenceI getSeq(char GapChar)
128    {
129  172 Sequence seq;
130  172 if (refseq == null || length == 0)
131    {
132  0 return null;
133    }
134  172 Object[] edit_result = getSequenceAndDeletions(
135    refseq.getSequenceAsString(start, end), GapChar);
136  172 if (edit_result == null)
137    {
138  0 throw new Error(MessageManager.getString(
139    "error.implementation_error_unexpected_null_from_get_sequence_and_deletions"));
140    }
141  172 int bounds[] = (int[]) edit_result[1];
142  172 seq = new Sequence(refseq.getName(), (String) edit_result[0],
143    refseq.getStart() + start + bounds[0], refseq.getStart() + start
144  172 + ((bounds[2] == 0) ? -1 : bounds[2]));
145  172 seq.setDescription(refseq.getDescription());
146  172 int sstart = seq.getStart(), send = seq.getEnd();
147    // seq.checkValidRange(); probably not needed
148    // recover local properties if present
149  172 if (seqProps != null)
150    {
151    // this recovers dataset sequence reference as well as local features,
152    // names, start/end settings.
153  172 SeqsetUtils.SeqCharacterUnhash(seq, seqProps);
154    }
155    // ensure dataset sequence is up to date from local reference
156  172 seq.setDatasetSequence(refseq);
157  172 seq.setStart(sstart);
158  172 seq.setEnd(send);
159  172 return seq;
160    }
161   
162    /*
163    * We don't allow this - refseq is given at construction time only public void
164    * setSeq(SequenceI seq) { this.seq = seq; }
165    */
166    /**
167    * internal constructor - sets seq to a gapless sequence derived from seq and
168    * prepends any 'D' operations needed to get to the first residue of seq.
169    *
170    * @param seq
171    * SequenceI
172    * @param initialDeletion
173    * true to mark initial dataset sequence residues as deleted in
174    * subsequence
175    * @param _s
176    * index of first position in seq
177    * @param _e
178    * index after last position in (possibly gapped) seq
179    * @return true if gaps are present in seq
180    */
 
181  136 toggle private boolean _setSeq(SequenceI seq, boolean initialDeletion, int _s,
182    int _e)
183    {
184  136 boolean hasgaps = false;
185  136 if (seq == null)
186    {
187  0 throw new Error(MessageManager
188    .getString("error.implementation_error_set_seq_null"));
189    }
190  136 if (_s < 0)
191    {
192  0 throw new Error(MessageManager
193    .formatMessage("error.implementation_error_s", new String[]
194    { Integer.valueOf(_s).toString() }));
195    }
196  136 String seq_string = seq.getSequenceAsString();
197  136 if (_e == 0 || _e < _s || _e > seq_string.length())
198    {
199  10 _e = seq_string.length();
200    }
201    // resolve start and end positions relative to ungapped reference sequence
202  136 start = seq.findPosition(_s) - seq.getStart();
203  136 end = seq.findPosition(_e) - seq.getStart();
204  136 int l_ungapped = end - start;
205    // Find correct sequence to reference and correct start and end - if
206    // necessary
207  136 SequenceI ds = seq.getDatasetSequence();
208  136 if (ds == null)
209    {
210    // make a new dataset sequence
211  17 String ungapped = AlignSeq.extractGaps(
212    jalview.util.Comparison.GapChars, new String(seq_string));
213  17 l_ungapped = ungapped.length();
214    // check that we haven't just duplicated an ungapped sequence.
215  17 if (l_ungapped == seq.getLength())
216    {
217  16 ds = seq;
218    }
219    else
220    {
221  1 ds = new Sequence(seq.getName(), ungapped, seq.getStart(),
222    seq.getStart() + ungapped.length() - 1);
223    // JBPNote: this would be consistent but may not be useful
224    // seq.setDatasetSequence(ds);
225    }
226    }
227    // add in offset between seq and the dataset sequence
228  136 if (ds.getStart() < seq.getStart())
229    {
230  14 int offset = seq.getStart() - ds.getStart();
231  14 if (initialDeletion)
232    {
233    // absolute cigar string
234  0 addDeleted(_s + offset);
235  0 start = 0;
236  0 end += offset;
237    }
238    else
239    {
240    // normal behaviour - just mark start and end subsequence
241  14 start += offset;
242  14 end += offset;
243   
244    }
245   
246    }
247   
248    // any gaps to process ?
249  136 if (l_ungapped != (_e - _s))
250    {
251  106 hasgaps = true;
252    }
253   
254  136 refseq = ds;
255    // copy over local properties for the sequence instance of the refseq
256  136 seqProps = SeqsetUtils.SeqCharacterHash(seq);
257    // Check offsets
258  136 if (end > ds.getLength())
259    {
260  0 throw new Error(MessageManager
261    .getString("error.implementation_error_seqcigar_possible"));
262    // end = ds.getLength();
263    }
264   
265  136 return hasgaps;
266    }
267   
268    /**
269    * directly initialise a cigar object with a sequence of range, operation
270    * pairs and a sequence to apply it to. operation and range should be relative
271    * to the seq.getStart()'th residue of the dataset seq resolved from seq.
272    *
273    * @param seq
274    * SequenceI
275    * @param operation
276    * char[]
277    * @param range
278    * int[]
279    */
 
280  1 toggle public SeqCigar(SequenceI seq, char operation[], int range[])
281    {
282  1 super();
283  1 if (seq == null)
284    {
285  0 throw new Error(
286    MessageManager.getString("error.implmentation_bug_seq_null"));
287    }
288  1 if (operation.length != range.length)
289    {
290  0 throw new Error(MessageManager.getString(
291    "error.implementation_bug_cigar_operation_list_range_list"));
292    }
293   
294  1 if (operation != null)
295    {
296  1 this.operation = new char[operation.length + _inc_length];
297  1 this.range = new int[operation.length + _inc_length];
298   
299  1 if (_setSeq(seq, false, 0, 0))
300    {
301  0 throw new Error(MessageManager.getString(
302    "error.not_yet_implemented_cigar_object_from_cigar_string"));
303    }
304  11 for (int i = this.length, j = 0; j < operation.length; i++, j++)
305    {
306  10 char op = operation[j];
307  10 if (op != M && op != I && op != D)
308    {
309  0 throw new Error(MessageManager.formatMessage(
310    "error.implementation_bug_cigar_operation", new String[]
311    { Integer.valueOf(j).toString(),
312    Integer.valueOf(op).toString(),
313    Integer.valueOf(M).toString(),
314    Integer.valueOf(I).toString(),
315    Integer.valueOf(D).toString() }));
316    }
317  10 this.operation[i] = op;
318  10 this.range[i] = range[j];
319    }
320  1 this.length += operation.length;
321    }
322    else
323    {
324  0 this.operation = null;
325  0 this.range = null;
326  0 this.length = 0;
327  0 if (_setSeq(seq, false, 0, 0))
328    {
329  0 throw new Error(MessageManager.getString(
330    "error.not_yet_implemented_cigar_object_from_cigar_string"));
331    }
332    }
333    }
334   
335    /**
336    * add range matched residues to cigar string
337    *
338    * @param range
339    * int
340    */
 
341  0 toggle public void addMatch(int range)
342    {
343  0 this.addOperation(M, range);
344    }
345   
346    /**
347    * Adds insertion and match operations based on seq to the cigar up to the
348    * endpos column of seq.
349    *
350    * @param cigar
351    * CigarBase
352    * @param seq
353    * SequenceI
354    * @param startpos
355    * int
356    * @param endpos
357    * int
358    * @param initialDeletions
359    * if true then initial deletions will be added from start of seq to
360    * startpos
361    */
 
362  135 toggle protected static void addSequenceOps(CigarBase cigar, SequenceI seq,
363    int startpos, int endpos, boolean initialDeletions)
364    {
365  135 char op = '\0';
366  135 int range = 0;
367  135 int p = 0, res = seq.getLength();
368   
369  135 if (!initialDeletions)
370    {
371  135 p = startpos;
372    }
373   
374  13220 while (p <= endpos)
375    {
376  13085 boolean isGap = (p < res)
377    ? jalview.util.Comparison.isGap(seq.getCharAt(p))
378    : true;
379  13085 if ((startpos <= p) && (p <= endpos))
380    {
381  13085 if (isGap)
382    {
383  1076 if (range > 0 && op != I)
384    {
385  243 cigar.addOperation(op, range);
386  243 range = 0;
387    }
388  1076 op = I;
389  1076 range++;
390    }
391    else
392    {
393  12009 if (range > 0 && op != M)
394    {
395  223 cigar.addOperation(op, range);
396  223 range = 0;
397    }
398  12009 op = M;
399  12009 range++;
400    }
401    }
402    else
403    {
404  0 if (!isGap)
405    {
406  0 if (range > 0 && op != D)
407    {
408  0 cigar.addOperation(op, range);
409  0 range = 0;
410    }
411  0 op = D;
412  0 range++;
413    }
414    else
415    {
416    // do nothing - insertions are not made in flanking regions
417    }
418    }
419  13085 p++;
420    }
421  135 if (range > 0)
422    {
423  135 cigar.addOperation(op, range);
424    }
425    }
426   
427    /**
428    * create a cigar string for given sequence
429    *
430    * @param seq
431    * SequenceI
432    */
 
433  9 toggle public SeqCigar(SequenceI seq)
434    {
435  9 super();
436  9 if (seq == null)
437    {
438  0 throw new Error(MessageManager
439    .getString("error.implementation_error_for_new_cigar"));
440    }
441  9 _setSeq(seq, false, 0, 0);
442    // there is still work to do
443  9 addSequenceOps(this, seq, 0, seq.getLength() - 1, false);
444    }
445   
446    /**
447    * Create Cigar from a range of gaps and residues on a sequence object
448    *
449    * @param seq
450    * SequenceI
451    * @param start
452    * int - first column in range
453    * @param end
454    * int - last column in range
455    */
 
456  126 toggle public SeqCigar(SequenceI seq, int start, int end)
457    {
458  126 super();
459  126 if (seq == null)
460    {
461  0 throw new Error(MessageManager
462    .getString("error.implementation_error_for_new_cigar"));
463    }
464  126 _setSeq(seq, false, start, end + 1);
465    // there is still work to do
466  126 addSequenceOps(this, seq, start, end, false);
467    }
468   
469    /**
470    * Create a cigar object from a cigar string like '[<I|D|M><range>]+' Will
471    * fail if the given seq already contains gaps (JBPNote: future implementation
472    * will fix)
473    *
474    * @param seq
475    * SequenceI object resolvable to a dataset sequence
476    * @param cigarString
477    * String
478    * @return Cigar
479    */
 
480  1 toggle public static SeqCigar parseCigar(SequenceI seq, String cigarString)
481    throws Exception
482    {
483  1 Object[] opsandrange = parseCigarString(cigarString);
484  1 return new SeqCigar(seq, (char[]) opsandrange[0],
485    (int[]) opsandrange[1]);
486    }
487   
488    /**
489    * create an alignment from the given array of cigar sequences and gap
490    * character, and marking the given segments as visible in the given
491    * hiddenColumns.
492    *
493    * @param alseqs
494    * @param gapCharacter
495    * @param hidden
496    * - hiddenColumns where hidden regions are marked
497    * @param segments
498    * - visible regions of alignment
499    * @return SequenceI[]
500    */
 
501  5 toggle public static SequenceI[] createAlignmentSequences(SeqCigar[] alseqs,
502    char gapCharacter, HiddenColumns hidden, int[] segments)
503    {
504  5 SequenceI[] seqs = new SequenceI[alseqs.length];
505  5 StringBuffer[] g_seqs = new StringBuffer[alseqs.length];
506  5 String[] alseqs_string = new String[alseqs.length];
507  5 Object[] gs_regions = new Object[alseqs.length];
508  15 for (int i = 0; i < alseqs.length; i++)
509    {
510  10 alseqs_string[i] = alseqs[i].getRefSeq()
511    .getSequenceAsString(alseqs[i].start, alseqs[i].end);
512  10 gs_regions[i] = alseqs[i].getSequenceAndDeletions(alseqs_string[i],
513    gapCharacter); // gapped sequence, {start, start col, end.
514    // endcol}, hidden regions {{start, end, col}})
515  10 if (gs_regions[i] == null)
516    {
517  0 throw new Error(MessageManager.formatMessage(
518    "error.implementation_error_cigar_seq_no_operations",
519    new String[]
520    { Integer.valueOf(i).toString() }));
521    }
522  10 g_seqs[i] = new StringBuffer((String) ((Object[]) gs_regions[i])[0]); // the
523    // visible
524    // gapped
525    // sequence
526    }
527    // Now account for insertions. (well - deletions)
528    // this is complicated because we must keep track of shifted positions in
529    // each sequence
530  5 ShiftList shifts = new ShiftList();
531  15 for (int i = 0; i < alseqs.length; i++)
532    {
533  10 Object[] gs_region = ((Object[]) ((Object[]) gs_regions[i])[2]);
534  10 if (gs_region != null)
535   
536    {
537  2 for (int hr = 0; hr < gs_region.length; hr++)
538    {
539  1 int[] region = (int[]) gs_region[hr];
540  1 char[] insert = new char[region[1] - region[0] + 1];
541  7 for (int s = 0; s < insert.length; s++)
542    {
543  6 insert[s] = gapCharacter;
544    }
545  1 int inspos = shifts.shift(region[2]); // resolve insertion position in
546    // current alignment frame of
547    // reference
548  4 for (int s = 0; s < alseqs.length; s++)
549    {
550  3 if (s != i)
551    {
552  2 if (g_seqs[s].length() <= inspos)
553    {
554    // prefix insertion with more gaps.
555  0 for (int l = inspos - g_seqs[s].length(); l > 0; l--)
556    {
557  0 g_seqs[s].append(gapCharacter); // to debug - use a diffferent
558    // gap character here
559    }
560    }
561  2 g_seqs[s].insert(inspos, insert);
562    }
563    else
564    {
565  1 g_seqs[s].insert(inspos,
566    alseqs_string[i].substring(region[0], region[1] + 1));
567    }
568    }
569  1 shifts.addShift(region[2], insert.length); // update shift in
570    // alignment frame of
571    // reference
572  1 if (segments == null)
573    {
574    // add a hidden column for this deletion
575  1 hidden.hideColumns(inspos, inspos + insert.length - 1);
576    }
577    }
578    }
579    }
580  15 for (int i = 0; i < alseqs.length; i++)
581    {
582  10 int[] bounds = ((int[]) ((Object[]) gs_regions[i])[1]);
583  10 SequenceI ref = alseqs[i].getRefSeq();
584  10 seqs[i] = new Sequence(ref.getName(), g_seqs[i].toString(),
585    ref.getStart() + alseqs[i].start + bounds[0],
586    ref.getStart() + alseqs[i].start
587  10 + (bounds[2] == 0 ? -1 : bounds[2]));
588  10 seqs[i].setDatasetSequence(ref);
589  10 seqs[i].setDescription(ref.getDescription());
590    }
591  5 if (segments != null)
592    {
593  4 for (int i = 0; i < segments.length; i += 3)
594    {
595    // int start=shifts.shift(segments[i]-1)+1;
596    // int end=shifts.shift(segments[i]+segments[i+1]-1)-1;
597  2 hidden.hideColumns(segments[i + 1],
598    segments[i + 1] + segments[i + 2] - 1);
599    }
600    }
601  5 return seqs;
602    }
603   
604    /**
605    * references to entities that this sequence cigar is associated with.
606    */
607    private Hashtable selGroups = null;
608   
 
609  33 toggle public void setGroupMembership(Object group)
610    {
611  33 if (selGroups == null)
612    {
613  33 selGroups = new Hashtable();
614    }
615  33 selGroups.put(group, new int[0]);
616    }
617   
618    /**
619    * Test for and if present remove association to group.
620    *
621    * @param group
622    * @return true if group was associated and it was removed
623    */
 
624  0 toggle public boolean removeGroupMembership(Object group)
625    {
626  0 if (selGroups != null && selGroups.containsKey(group))
627    {
628  0 selGroups.remove(group);
629  0 return true;
630    }
631  0 return false;
632    }
633   
634    /**
635    * forget all associations for this sequence.
636    */
 
637  0 toggle public void clearMemberships()
638    {
639  0 if (selGroups != null)
640    {
641  0 selGroups.clear();
642    }
643  0 selGroups = null;
644    }
645   
646    /**
647    *
648    * @return null or array of all associated entities
649    */
 
650  0 toggle public Object[] getAllMemberships()
651    {
652  0 if (selGroups == null)
653    {
654  0 return null;
655    }
656  0 Object[] mmbs = new Object[selGroups.size()];
657  0 Enumeration en = selGroups.keys();
658  0 for (int i = 0; en.hasMoreElements(); i++)
659    {
660  0 mmbs[i] = en.nextElement();
661    }
662  0 return mmbs;
663    }
664   
665    /**
666    * Test for group membership
667    *
668    * @param sgr
669    * - a selection group or some other object that may be associated
670    * with seqCigar
671    * @return true if sgr is associated with this seqCigar
672    */
 
673  363 toggle public boolean isMemberOf(Object sgr)
674    {
675  363 return (selGroups != null) && selGroups.get(sgr) != null;
676    }
677    }