Clover icon

Coverage Report

  1. Project Clover database Thu Dec 4 2025 16:11:35 GMT
  2. Package jalview.datamodel

File SeqCigar.java

 

Coverage histogram

../../img/srcFileCovDistChart8.png
20% of files have more coverage

Code metrics

120
189
19
1
678
436
91
0.48
9.95
19
4.79

Classes

Class Line # Actions
SeqCigar 32 189 91
0.716463471.6%
 

Contributing tests

This file is covered by 73 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.datamodel;
22   
23    import jalview.analysis.AlignSeq;
24    import jalview.analysis.SeqsetUtils;
25    import jalview.analysis.SeqsetUtils.SequenceInfo;
26    import jalview.util.MessageManager;
27    import jalview.util.ShiftList;
28   
29    import java.util.Enumeration;
30    import java.util.Hashtable;
31   
 
32    public class SeqCigar extends CigarSimple
33    {
34    /**
35    * start(inclusive) and end(exclusive) of subsequence on refseq
36    */
37    private int start, end;
38   
39    private SequenceI refseq = null;
40   
41    private SequenceInfo seqProps;
42   
43    /**
44    * Reference dataset sequence for the cigar string
45    *
46    * @return SequenceI
47    */
 
48  232 toggle public SequenceI getRefSeq()
49    {
50  232 return refseq;
51    }
52   
53    /**
54    *
55    * @return int start index of cigar ops on refSeq
56    */
 
57  0 toggle public int getStart()
58    {
59  0 return start;
60    }
61   
62    /**
63    *
64    * @return int end index (exclusive) of cigar ops on refSeq
65    */
 
66  0 toggle public int getEnd()
67    {
68  0 return end;
69    }
70   
71    /**
72    *
73    * @param column
74    * @return position in sequence for column (or -1 if no match state exists)
75    */
 
76  2923 toggle public int findPosition(int column)
77    {
78  2923 int w = 0, ew, p = refseq.findPosition(start);
79  2923 if (column < 0)
80    {
81  0 return -1;
82    }
83  2923 if (range != null)
84    {
85  15035 for (int i = 0; i < length; i++)
86    {
87  15035 if (operation[i] == M || operation[i] == D)
88    {
89  8664 p += range[i];
90    }
91  15035 if (operation[i] == M || operation[i] == I)
92    {
93  15035 ew = w + range[i];
94  15035 if (column < ew)
95    {
96  2923 if (operation[i] == I)
97    {
98  327 return -1;
99    }
100  2596 return p - (ew - column);
101    }
102  12112 w = ew;
103    }
104    }
105    }
106  0 return -1;
107    }
108   
109    /**
110    * Returns sequence as a string with cigar operations applied to it
111    *
112    * @return String
113    */
 
114  74 toggle @Override
115    public String getSequenceString(char GapChar)
116    {
117  74 return (length == 0) ? ""
118    : (String) getSequenceAndDeletions(
119    refseq.getSequenceAsString(start, end), GapChar)[0];
120    }
121   
122    /**
123    * recreates a gapped and edited version of RefSeq or null for an empty cigar
124    * string
125    *
126    * @return SequenceI
127    */
 
128  299 toggle public SequenceI getSeq(char GapChar)
129    {
130  299 Sequence seq;
131  299 if (refseq == null || length == 0)
132    {
133  1 return null;
134    }
135  298 Object[] edit_result = getSequenceAndDeletions(
136    refseq.getSequenceAsString(start, end), GapChar);
137  298 if (edit_result == null)
138    {
139  0 throw new Error(MessageManager.getString(
140    "error.implementation_error_unexpected_null_from_get_sequence_and_deletions"));
141    }
142  298 int bounds[] = (int[]) edit_result[1];
143  298 seq = new Sequence(refseq.getName(), (String) edit_result[0],
144    refseq.getStart() + start + bounds[0], refseq.getStart() + start
145  298 + ((bounds[2] == 0) ? -1 : bounds[2]));
146  298 seq.setDescription(refseq.getDescription());
147  298 int sstart = seq.getStart(), send = seq.getEnd();
148    // seq.checkValidRange(); probably not needed
149    // recover local properties if present
150  298 if (seqProps != null)
151    {
152    // this recovers dataset sequence reference as well as local features,
153    // names, start/end settings.
154  298 SeqsetUtils.SeqCharacterUnhash(seq, seqProps);
155    }
156    // ensure dataset sequence is up to date from local reference
157  298 seq.setDatasetSequence(refseq);
158  298 seq.setStart(sstart);
159  298 seq.setEnd(send);
160  298 return seq;
161    }
162   
163    /*
164    * We don't allow this - refseq is given at construction time only public void
165    * setSeq(SequenceI seq) { this.seq = seq; }
166    */
167    /**
168    * internal constructor - sets seq to a gapless sequence derived from seq and
169    * prepends any 'D' operations needed to get to the first residue of seq.
170    *
171    * @param seq
172    * SequenceI
173    * @param initialDeletion
174    * true to mark initial dataset sequence residues as deleted in
175    * subsequence
176    * @param _s
177    * index of first position in seq
178    * @param _e
179    * index after last position in (possibly gapped) seq
180    * @return true if gaps are present in seq
181    */
 
182  276 toggle private boolean _setSeq(SequenceI seq, boolean initialDeletion, int _s,
183    int _e)
184    {
185  276 boolean hasgaps = false;
186  276 if (seq == null)
187    {
188  0 throw new Error(MessageManager
189    .getString("error.implementation_error_set_seq_null"));
190    }
191  276 if (_s < 0)
192    {
193  0 throw new Error(MessageManager
194    .formatMessage("error.implementation_error_s", new String[]
195    { Integer.valueOf(_s).toString() }));
196    }
197  276 String seq_string = seq.getSequenceAsString();
198  276 if (_e == 0 || _e < _s || _e > seq_string.length())
199    {
200  12 _e = seq_string.length();
201    }
202    // resolve start and end positions relative to ungapped reference sequence
203  276 start = seq.findPosition(_s) - seq.getStart();
204  276 end = seq.findPosition(_e) - seq.getStart();
205  276 int l_ungapped = end - start;
206    // Find correct sequence to reference and correct start and end - if
207    // necessary
208  276 SequenceI ds = seq.getDatasetSequence();
209  276 if (ds == null)
210    {
211    // make a new dataset sequence
212  134 String ungapped = AlignSeq.extractGaps(
213    jalview.util.Comparison.GapChars, new String(seq_string));
214  134 l_ungapped = ungapped.length();
215    // check that we haven't just duplicated an ungapped sequence.
216  134 if (l_ungapped == seq.getLength())
217    {
218  37 ds = seq;
219    }
220    else
221    {
222  97 ds = new Sequence(seq.getName(), ungapped, seq.getStart(),
223    seq.getStart() + ungapped.length() - 1);
224    // JBPNote: this would be consistent but may not be useful
225    // seq.setDatasetSequence(ds);
226    }
227    }
228    // add in offset between seq and the dataset sequence
229  276 if (ds.getStart() < seq.getStart())
230    {
231  14 int offset = seq.getStart() - ds.getStart();
232  14 if (initialDeletion)
233    {
234    // absolute cigar string
235  0 addDeleted(_s + offset);
236  0 start = 0;
237  0 end += offset;
238    }
239    else
240    {
241    // normal behaviour - just mark start and end subsequence
242  14 start += offset;
243  14 end += offset;
244   
245    }
246   
247    }
248   
249    // any gaps to process ?
250  276 if (l_ungapped != (_e - _s))
251    {
252  217 hasgaps = true;
253    }
254   
255  276 refseq = ds;
256    // copy over local properties for the sequence instance of the refseq
257  276 seqProps = SeqsetUtils.SeqCharacterHash(seq);
258    // Check offsets
259  276 if (end > ds.getLength())
260    {
261  0 throw new Error(MessageManager
262    .getString("error.implementation_error_seqcigar_possible"));
263    // end = ds.getLength();
264    }
265   
266  276 return hasgaps;
267    }
268   
269    /**
270    * directly initialise a cigar object with a sequence of range, operation
271    * pairs and a sequence to apply it to. operation and range should be relative
272    * to the seq.getStart()'th residue of the dataset seq resolved from seq.
273    *
274    * @param seq
275    * SequenceI
276    * @param operation
277    * char[]
278    * @param range
279    * int[]
280    */
 
281  1 toggle public SeqCigar(SequenceI seq, char operation[], int range[])
282    {
283  1 super();
284  1 if (seq == null)
285    {
286  0 throw new Error(
287    MessageManager.getString("error.implmentation_bug_seq_null"));
288    }
289  1 if (operation.length != range.length)
290    {
291  0 throw new Error(MessageManager.getString(
292    "error.implementation_bug_cigar_operation_list_range_list"));
293    }
294   
295  1 if (operation != null)
296    {
297  1 this.operation = new char[operation.length + _inc_length];
298  1 this.range = new int[operation.length + _inc_length];
299   
300  1 if (_setSeq(seq, false, 0, 0))
301    {
302  0 throw new Error(MessageManager.getString(
303    "error.not_yet_implemented_cigar_object_from_cigar_string"));
304    }
305  11 for (int i = this.length, j = 0; j < operation.length; i++, j++)
306    {
307  10 char op = operation[j];
308  10 if (op != M && op != I && op != D)
309    {
310  0 throw new Error(MessageManager.formatMessage(
311    "error.implementation_bug_cigar_operation", new String[]
312    { Integer.valueOf(j).toString(),
313    Integer.valueOf(op).toString(),
314    Integer.valueOf(M).toString(),
315    Integer.valueOf(I).toString(),
316    Integer.valueOf(D).toString() }));
317    }
318  10 this.operation[i] = op;
319  10 this.range[i] = range[j];
320    }
321  1 this.length += operation.length;
322    }
323    else
324    {
325  0 this.operation = null;
326  0 this.range = null;
327  0 this.length = 0;
328  0 if (_setSeq(seq, false, 0, 0))
329    {
330  0 throw new Error(MessageManager.getString(
331    "error.not_yet_implemented_cigar_object_from_cigar_string"));
332    }
333    }
334    }
335   
336    /**
337    * add range matched residues to cigar string
338    *
339    * @param range
340    * int
341    */
 
342  0 toggle public void addMatch(int range)
343    {
344  0 this.addOperation(M, range);
345    }
346   
347    /**
348    * Adds insertion and match operations based on seq to the cigar up to the
349    * endpos column of seq.
350    *
351    * @param cigar
352    * CigarBase
353    * @param seq
354    * SequenceI
355    * @param startpos
356    * int
357    * @param endpos
358    * int
359    * @param initialDeletions
360    * if true then initial deletions will be added from start of seq to
361    * startpos
362    */
 
363  275 toggle protected static void addSequenceOps(CigarBase cigar, SequenceI seq,
364    int startpos, int endpos, boolean initialDeletions)
365    {
366  275 char op = '\0';
367  275 int range = 0;
368  275 int p = 0, res = seq.getLength();
369   
370  275 if (!initialDeletions)
371    {
372  275 p = startpos;
373    }
374   
375  17465 while (p <= endpos)
376    {
377  17190 boolean isGap = (p < res)
378    ? jalview.util.Comparison.isGap(seq.getCharAt(p))
379    : true;
380  17190 if ((startpos <= p) && (p <= endpos))
381    {
382  17190 if (isGap)
383    {
384  2035 if (range > 0 && op != I)
385    {
386  463 cigar.addOperation(op, range);
387  463 range = 0;
388    }
389  2035 op = I;
390  2035 range++;
391    }
392    else
393    {
394  15155 if (range > 0 && op != M)
395    {
396  419 cigar.addOperation(op, range);
397  419 range = 0;
398    }
399  15155 op = M;
400  15155 range++;
401    }
402    }
403    else
404    {
405  0 if (!isGap)
406    {
407  0 if (range > 0 && op != D)
408    {
409  0 cigar.addOperation(op, range);
410  0 range = 0;
411    }
412  0 op = D;
413  0 range++;
414    }
415    else
416    {
417    // do nothing - insertions are not made in flanking regions
418    }
419    }
420  17190 p++;
421    }
422  275 if (range > 0)
423    {
424  274 cigar.addOperation(op, range);
425    }
426    }
427   
428    /**
429    * create a cigar string for given sequence
430    *
431    * @param seq
432    * SequenceI
433    */
 
434  9 toggle public SeqCigar(SequenceI seq)
435    {
436  9 super();
437  9 if (seq == null)
438    {
439  0 throw new Error(MessageManager
440    .getString("error.implementation_error_for_new_cigar"));
441    }
442  9 _setSeq(seq, false, 0, 0);
443    // there is still work to do
444  9 addSequenceOps(this, seq, 0, seq.getLength() - 1, false);
445    }
446   
447    /**
448    * Create Cigar from a range of gaps and residues on a sequence object
449    *
450    * @param seq
451    * SequenceI
452    * @param start
453    * int - first column in range
454    * @param end
455    * int - last column in range
456    */
 
457  266 toggle public SeqCigar(SequenceI seq, int start, int end)
458    {
459  266 super();
460  266 if (seq == null)
461    {
462  0 throw new Error(MessageManager
463    .getString("error.implementation_error_for_new_cigar"));
464    }
465  266 _setSeq(seq, false, start, end + 1);
466    // there is still work to do
467  266 addSequenceOps(this, seq, start, end, false);
468    }
469   
470    /**
471    * Create a cigar object from a cigar string like '[<I|D|M><range>]+' Will
472    * fail if the given seq already contains gaps (JBPNote: future implementation
473    * will fix)
474    *
475    * @param seq
476    * SequenceI object resolvable to a dataset sequence
477    * @param cigarString
478    * String
479    * @return Cigar
480    */
 
481  1 toggle public static SeqCigar parseCigar(SequenceI seq, String cigarString)
482    throws Exception
483    {
484  1 Object[] opsandrange = parseCigarString(cigarString);
485  1 return new SeqCigar(seq, (char[]) opsandrange[0],
486    (int[]) opsandrange[1]);
487    }
488   
489    /**
490    * create an alignment from the given array of cigar sequences and gap
491    * character, and marking the given segments as visible in the given
492    * hiddenColumns.
493    *
494    * @param alseqs
495    * @param gapCharacter
496    * @param hidden
497    * - hiddenColumns where hidden regions are marked
498    * @param segments
499    * - visible regions of alignment
500    * @return SequenceI[]
501    */
 
502  26 toggle public static SequenceI[] createAlignmentSequences(SeqCigar[] alseqs,
503    char gapCharacter, HiddenColumns hidden, int[] segments)
504    {
505  26 SequenceI[] seqs = new SequenceI[alseqs.length];
506  26 StringBuffer[] g_seqs = new StringBuffer[alseqs.length];
507  27 String[] alseqs_string = new String[alseqs.length];
508  27 Object[] gs_regions = new Object[alseqs.length];
509  98 for (int i = 0; i < alseqs.length; i++)
510    {
511  71 alseqs_string[i] = alseqs[i].getRefSeq()
512    .getSequenceAsString(alseqs[i].start, alseqs[i].end);
513  72 gs_regions[i] = alseqs[i].getSequenceAndDeletions(alseqs_string[i],
514    gapCharacter); // gapped sequence, {start, start col, end.
515    // endcol}, hidden regions {{start, end, col}})
516  72 if (gs_regions[i] == null)
517    {
518  0 throw new Error(MessageManager.formatMessage(
519    "error.implementation_error_cigar_seq_no_operations",
520    new String[]
521    { Integer.valueOf(i).toString() }));
522    }
523  72 g_seqs[i] = new StringBuffer((String) ((Object[]) gs_regions[i])[0]); // the
524    // visible
525    // gapped
526    // sequence
527    }
528    // Now account for insertions. (well - deletions)
529    // this is complicated because we must keep track of shifted positions in
530    // each sequence
531  27 ShiftList shifts = new ShiftList();
532  99 for (int i = 0; i < alseqs.length; i++)
533    {
534  72 Object[] gs_region = ((Object[]) ((Object[]) gs_regions[i])[2]);
535  72 if (gs_region != null)
536   
537    {
538  2 for (int hr = 0; hr < gs_region.length; hr++)
539    {
540  1 int[] region = (int[]) gs_region[hr];
541  1 char[] insert = new char[region[1] - region[0] + 1];
542  7 for (int s = 0; s < insert.length; s++)
543    {
544  6 insert[s] = gapCharacter;
545    }
546  1 int inspos = shifts.shift(region[2]); // resolve insertion position in
547    // current alignment frame of
548    // reference
549  4 for (int s = 0; s < alseqs.length; s++)
550    {
551  3 if (s != i)
552    {
553  2 if (g_seqs[s].length() <= inspos)
554    {
555    // prefix insertion with more gaps.
556  0 for (int l = inspos - g_seqs[s].length(); l > 0; l--)
557    {
558  0 g_seqs[s].append(gapCharacter); // to debug - use a diffferent
559    // gap character here
560    }
561    }
562  2 g_seqs[s].insert(inspos, insert);
563    }
564    else
565    {
566  1 g_seqs[s].insert(inspos,
567    alseqs_string[i].substring(region[0], region[1] + 1));
568    }
569    }
570  1 shifts.addShift(region[2], insert.length); // update shift in
571    // alignment frame of
572    // reference
573  1 if (segments == null)
574    {
575    // add a hidden column for this deletion
576  1 hidden.hideColumns(inspos, inspos + insert.length - 1);
577    }
578    }
579    }
580    }
581  99 for (int i = 0; i < alseqs.length; i++)
582    {
583  72 int[] bounds = ((int[]) ((Object[]) gs_regions[i])[1]);
584  72 SequenceI ref = alseqs[i].getRefSeq();
585  72 seqs[i] = new Sequence(ref.getName(), g_seqs[i].toString(),
586    ref.getStart() + alseqs[i].start + bounds[0],
587    ref.getStart() + alseqs[i].start
588  72 + (bounds[2] == 0 ? -1 : bounds[2]));
589  72 seqs[i].setDatasetSequence(ref);
590  72 seqs[i].setDescription(ref.getDescription());
591    }
592  27 if (segments != null)
593    {
594  6 for (int i = 0; i < segments.length; i += 3)
595    {
596    // int start=shifts.shift(segments[i]-1)+1;
597    // int end=shifts.shift(segments[i]+segments[i+1]-1)-1;
598  3 hidden.hideColumns(segments[i + 1],
599    segments[i + 1] + segments[i + 2] - 1);
600    }
601    }
602  27 return seqs;
603    }
604   
605    /**
606    * references to entities that this sequence cigar is associated with.
607    */
608    private Hashtable selGroups = null;
609   
 
610  33 toggle public void setGroupMembership(Object group)
611    {
612  33 if (selGroups == null)
613    {
614  33 selGroups = new Hashtable();
615    }
616  33 selGroups.put(group, new int[0]);
617    }
618   
619    /**
620    * Test for and if present remove association to group.
621    *
622    * @param group
623    * @return true if group was associated and it was removed
624    */
 
625  0 toggle public boolean removeGroupMembership(Object group)
626    {
627  0 if (selGroups != null && selGroups.containsKey(group))
628    {
629  0 selGroups.remove(group);
630  0 return true;
631    }
632  0 return false;
633    }
634   
635    /**
636    * forget all associations for this sequence.
637    */
 
638  0 toggle public void clearMemberships()
639    {
640  0 if (selGroups != null)
641    {
642  0 selGroups.clear();
643    }
644  0 selGroups = null;
645    }
646   
647    /**
648    *
649    * @return null or array of all associated entities
650    */
 
651  0 toggle public Object[] getAllMemberships()
652    {
653  0 if (selGroups == null)
654    {
655  0 return null;
656    }
657  0 Object[] mmbs = new Object[selGroups.size()];
658  0 Enumeration en = selGroups.keys();
659  0 for (int i = 0; en.hasMoreElements(); i++)
660    {
661  0 mmbs[i] = en.nextElement();
662    }
663  0 return mmbs;
664    }
665   
666    /**
667    * Test for group membership
668    *
669    * @param sgr
670    * - a selection group or some other object that may be associated
671    * with seqCigar
672    * @return true if sgr is associated with this seqCigar
673    */
 
674  363 toggle public boolean isMemberOf(Object sgr)
675    {
676  363 return (selGroups != null) && selGroups.get(sgr) != null;
677    }
678    }