1. Project Clover database Wed Nov 13 2024 18:27:33 GMT
  2. Package jalview.datamodel

File CigarBase.java

 

Coverage histogram

../../img/srcFileCovDistChart9.png
12% of files have more coverage

Code metrics

106
220
13
1
661
422
99
0.45
16.92
13
7.62

Classes

Class
Line #
Actions
CigarBase 27 220 99
0.82005982%
 

Contributing tests

This file is covered by 19 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.datamodel;
22   
23    import jalview.util.MessageManager;
24   
25    import java.util.Vector;
26   
 
27    public abstract class CigarBase
28    {
29    /**
30    * Base class for compact idiosyncratic representation of gaps and aligned
31    * residues Regards to Tom Oldfield for his DynamicArray class. 17th July 2006
32    * Not thread safe.
33    */
 
34  173 toggle public CigarBase()
35    {
36    // nothing to be done (probably)
37    }
38   
39    protected int length = 0;
40   
41    protected int _inc_length = 10; // extension range for addition of new
42   
43    // operations
44   
45    protected char[] operation = null;
46   
47    protected int[] range = null;
48   
49    /**
50    * Range of Hidden residues in seq (translated as deleted in seq)
51    */
52    public static final char D = 'D';
53   
54    /**
55    * Range of insertions to seq
56    */
57    public static final char I = 'I';
58   
59    /**
60    * Range of aligned residues
61    */
62    public static final char M = 'M';
63   
64    static protected final char _case_shift = 'a' - 'A';
65   
66    /**
67    * Ugly function to get edited sequence string, start and end symbol positions
68    * and the deletion regions as an array of int pairs May return null for an
69    * empty cigar string. May return null for deletion ranges if there are none.
70    *
71    * @param reference
72    * - the symbol sequence to apply the cigar operations to (or null if
73    * no sequence)
74    * @param GapChar
75    * - the symbol to use for Insert operations
76    * @return Object[] { String, int[] {start, startcol, end, endcol}, int[][3]
77    * {start, end, col} or null} the gapped sequence, first and last
78    * residue index, and the deletion ranges on the reference sequence
79    */
 
80  208 toggle public Object[] getSequenceAndDeletions(String reference, char GapChar)
81    {
82  208 int rlength = 0;
83  208 int[][] deletions = new int[length][];
84  208 int[][] trunc_deletions = null;
85  208 StringBuffer sq = new StringBuffer();
86  208 int cursor = 0, alcursor = 0, start = 0, startpos = 0, end = 0,
87    endpos = 0, delcount = -1;
88  208 boolean consecutive_del = false;
89  208 if (length == 0)
90    {
91  0 return null;
92    }
93  208 if (reference != null)
94    {
95  208 rlength = reference.length();
96    }
97  208 boolean modstart = true;
98  1328 for (int i = 0; i < length; i++)
99    {
100  1120 switch (operation[i])
101    {
102  106 case D:
103  106 if (!consecutive_del)
104    {
105  106 deletions[++delcount] = new int[] { cursor, 0, alcursor };
106    }
107  106 cursor += range[i];
108  106 deletions[delcount][1] = cursor - 1;
109  106 consecutive_del = true;
110  106 break;
111  410 case I:
112  410 consecutive_del = false;
113  1846 for (int r = 0; r < range[i]; r++)
114    {
115  1436 sq.append(GapChar);
116  1436 alcursor++;
117    }
118  410 break;
119  604 case M:
120  604 consecutive_del = false;
121  604 if (modstart)
122    {
123  206 start = cursor;
124  206 startpos = alcursor;
125  206 modstart = false;
126    }
127  604 if (reference != null)
128    {
129  604 int sbend = cursor + range[i];
130  604 if (sbend > rlength)
131    {
132  0 sq.append(reference.substring(cursor, rlength));
133  0 while (sbend-- >= rlength)
134    {
135  0 sq.append(GapChar);
136    }
137    }
138    else
139    {
140  604 sq.append(reference.substring(cursor, sbend));
141    }
142    }
143  604 alcursor += range[i];
144  604 cursor += range[i];
145  604 end = cursor - 1;
146  604 endpos = alcursor;
147  604 break;
148  0 default:
149  0 throw new Error(MessageManager.formatMessage(
150    "error.unknown_seq_cigar_operation", new String[]
151    { new StringBuffer(operation[i]).toString() }));
152    }
153    }
154  208 if (++delcount > 0)
155    {
156  96 trunc_deletions = new int[delcount][];
157  96 System.arraycopy(deletions, 0, trunc_deletions, 0, delcount);
158    }
159  208 deletions = null;
160  208 return new Object[] { ((reference != null) ? sq.toString() : null),
161    new int[]
162    { start, startpos, end, endpos }, trunc_deletions };
163    }
164   
 
165  101 toggle protected void compact_operations()
166    {
167  101 int i = 1;
168  101 if (operation == null)
169    {
170  101 return;
171    }
172  0 char last = operation[0];
173  0 while (i < length)
174    {
175  0 if (last == operation[i])
176    {
177  0 range[i - 1] += range[i];
178  0 int r = length - i;
179  0 if (r > 0)
180    {
181  0 System.arraycopy(range, i + 1, range, i, r);
182  0 System.arraycopy(operation, i + 1, operation, i, r);
183    }
184  0 length--;
185    }
186    else
187    {
188  0 last = operation[i++];
189    }
190    }
191    }
192   
193    /**
194    * turn a cigar string into a series of operation range pairs
195    *
196    * @param cigarString
197    * String
198    * @return object[] {char[] operation, int[] range}
199    * @throws java.lang.Exception
200    * for improperly formated cigar strings or ones with unknown
201    * operations
202    */
 
203  1 toggle public static Object[] parseCigarString(String cigarString)
204    throws Exception
205    {
206  1 int ops = 0;
207  23 for (int i = 0, l = cigarString.length(); i < l; i++)
208    {
209  22 char c = cigarString.charAt(i);
210  22 if (c == M || c == (M - _case_shift) || c == I
211    || c == (I - _case_shift) || c == D || c == (D - _case_shift))
212    {
213  10 ops++;
214    }
215    }
216  1 char[] operation = new char[ops];
217  1 int[] range = new int[ops];
218  1 int op = 0;
219  1 int i = 0, l = cigarString.length();
220  11 while (i < l)
221    {
222  10 char c;
223  10 int j = i;
224  10 do
225    {
226  22 c = cigarString.charAt(j++);
227  22 } while (c >= '0' && c <= '9' && j < l);
228  10 if (j >= l && c >= '0' && c <= '9')
229    {
230  0 throw new Exception(MessageManager
231    .getString("exception.unterminated_cigar_string"));
232    }
233  10 try
234    {
235  10 String rangeint = cigarString.substring(i, j - 1);
236  10 range[op] = Integer.parseInt(rangeint);
237  10 i = j;
238    } catch (Exception e)
239    {
240  0 throw new Error(MessageManager
241    .getString("error.implementation_bug_parse_cigar_string"));
242    }
243  10 if (c >= 'a' && c <= 'z')
244    {
245  0 c -= _case_shift;
246    }
247  10 if ((c == M || c == I || c == D))
248    {
249  10 operation[op++] = c;
250    }
251    else
252    {
253  0 throw new Exception(MessageManager.formatMessage(
254    "exception.unexpected_operation_cigar_string_pos",
255    new String[]
256    { new StringBuffer(c).toString(),
257    Integer.valueOf(i).toString(), cigarString }));
258    }
259    }
260  1 return new Object[] { operation, range };
261    }
262   
263    /**
264    * add an operation to cigar string
265    *
266    * @param op
267    * char
268    * @param range
269    * int
270    */
 
271  1403 toggle public void addOperation(char op, int range)
272    {
273  1403 if (op >= 'a' && op <= 'z')
274    {
275  0 op -= _case_shift;
276    }
277  1403 if (op != M && op != D && op != I)
278    {
279  0 throw new Error(MessageManager.getString(
280    "error.implementation_error_invalid_operation_string"));
281    }
282  1403 if (range == 0)
283    {
284  0 return; // No Operation to add.
285    }
286  1403 if (range < 0)
287    {
288  0 throw new Error(
289    MessageManager.getString("error.invalid_range_string"));
290    }
291  1403 int lngth = 0;
292  1403 if (operation == null)
293    {
294  272 this.operation = new char[_inc_length];
295  272 this.range = new int[_inc_length];
296    }
297  1403 if (length + 1 == operation.length)
298    {
299  23 char[] ops = this.operation;
300  23 this.operation = new char[length + 1 + _inc_length];
301  23 System.arraycopy(ops, 0, this.operation, 0, length);
302  23 ops = null;
303  23 int[] rng = this.range;
304  23 this.range = new int[length + 1 + _inc_length];
305  23 System.arraycopy(rng, 0, this.range, 0, length);
306  23 rng = null;
307    }
308  1403 if ((length > 0) && (operation[length - 1] == op))
309    {
310  98 length--; // modify existing operation.
311    }
312    else
313    {
314  1305 this.range[length] = 0; // reset range
315    }
316  1403 this.operation[length] = op;
317  1403 this.range[length++] += range;
318    }
319   
320    /**
321    * semi-efficient insert an operation on the current cigar string set at
322    * column pos (from 1) NOTE: Insertion operations simply extend width of cigar
323    * result - affecting registration of alignment Deletion ops will shorten
324    * length of result - and affect registration of alignment Match ops will also
325    * affect length of result - affecting registration of alignment (ie
326    * "10M".insert(4,I,3)->"4M3I3M") - (replace?) (ie
327    * "10M".insert(4,D,3)->"4M3D3M") - (shortens alignment) (ie
328    * "5I5M".insert(4,I,3)->"8I5M") - real insertion (ie
329    * "5I5M".insert(4,D,3)->"4I2D3M") - shortens aligment - I's are removed, Ms
330    * changed to Ds (ie "10M".insert(4,M,3)->"13M") - lengthens - Is changed to
331    * M, Ds changed to M. (ie "5I5M".insert(4,M,3)->"4I8M") - effectively shifts
332    * sequence left by 1 residue and extends it by 3 (
333    * "10D5M".insert(-1,M,3)->"3M7D5M") ( "10D5M".insert(0,M,3)->"7D8M") (
334    * "10D5M".insert(1,M,3)->"10D8M") ( "1M10D5M".insert(0,M,3)->"1M10D8M") (
335    * "1M10D5M".insert(1,M,3)->"
336    *
337    * if pos is beyond width - I operations are added before the operation
338    *
339    * @param pos
340    * int -1, 0-length of visible region, or greater to append new ops
341    * (with insertions in between)
342    * @param op
343    * char
344    * @param range
345    * int public void addOperationAt(int pos, char op, int range) { int
346    * cursor = -1; // mark the position for the current operation being
347    * edited. int o = 0; boolean last_d = false; // previous op was a
348    * deletion. if (pos < -1) throw new Error("pos<-1 is not
349    * supported."); while (o<length) { if (operation[o] != D) { if (
350    * (cursor + this.range[o]) < pos) { cursor += this.range[o]; o++;
351    * last_d=false; } else { break; } } else { last_d=true; o++; } } if
352    * (o==length) { // must insert more operations before pos if
353    * (pos-cursor>0) addInsertion(pos-cursor); // then just add the new
354    * operation. Regardless of what it is. addOperation(op, range); }
355    * else { int diff = pos - cursor;
356    *
357    * int e_length = length-o; // new edit operation array length. //
358    * diff<0 - can only happen before first insertion or match. -
359    * affects op and all following // dif==0 - only when at first
360    * position of existing op - // diff>0 - must preserve some existing
361    * operations int[] e_range = new int[e_length];
362    * System.arraycopy(this.range, o, e_range, 0, e_length); char[] e_op
363    * = new char[e_length]; System.arraycopy(this.operation, o, e_op, 0,
364    * e_length); length = o; // can now use add_operation to extend
365    * list. int e_o=0; // current operation being edited. switch (op) {
366    * case M: switch (e_op[e_o]) { case M: if (last_d && diff <= 0) { //
367    * reduce D's, if possible if (range<=this.range[o-1]) { this.range[o
368    * - 1] -= range; } else { this.range[o-1]=0; } if
369    * (this.range[o-1]==0) o--; // lose this op. } e_range[e_o] +=
370    * range; // just add more matched residues break; case I: // change
371    * from insertion to match if (last_d && diff<=0) { // reduce D's, if
372    * possible if (range<=this.range[o-1]) { this.range[o - 1] -= range;
373    * } else { this.range[o-1]=0; } if (this.range[o-1]==0) o--; // lose
374    * this op. } e_range[e_o] break; default: throw new Inp }
375    *
376    * break; case I: break; case D: } break; default: throw new
377    * Error("Implementation Error: Unknown operation in addOperation!");
378    * } // finally, add remaining ops. while (e_o<e_length) {
379    * addOperation(e_op[e_o], e_range[e_o]); e_o++; } } }
380    */
381    /**
382    * Mark residues from start to end (inclusive) as deleted from the alignment,
383    * and removes any insertions.
384    *
385    * @param start
386    * int
387    * @param end
388    * int
389    * @return deleted int - number of symbols marked as deleted
390    */
 
391  101 toggle public int deleteRange(int start, int end)
392    {
393  101 int deleted = 0;
394  101 if (length == 0)
395    {
396    // nothing to do here
397  0 return deleted;
398    }
399  101 if (start < 0 || start > end)
400    {
401  0 throw new Error(MessageManager.getString(
402    "error.implementation_error_delete_range_out_of_bounds"));
403    }
404    // find beginning
405  101 int cursor = 0; // mark the position for the current operation being edited.
406  101 int rlength = 1 + end - start; // number of positions to delete
407  101 int oldlen = length;
408  101 int o = 0;
409  101 boolean editing = false;
410  101 char[] oldops = operation;
411  101 int[] oldrange = range;
412  101 length = 0;
413  101 operation = null;
414  101 range = null;
415  101 compact_operations();
416  655 while (o < oldlen && cursor <= end && rlength > 0)
417    {
418  554 if (oldops[o] == D)
419    {
420    // absorbed into new deleted region.
421  70 addDeleted(oldrange[o++]);
422  70 continue;
423    }
424   
425  484 int remain = oldrange[o]; // number of op characters left to edit
426  484 if (!editing)
427    {
428  430 if ((cursor + remain) <= start)
429    {
430  329 addOperation(oldops[o], oldrange[o]);
431  329 cursor += oldrange[o++];
432  329 continue; // next operation
433    }
434  101 editing = true;
435    // add operations before hidden region
436  101 if (start - cursor > 0)
437    {
438  87 addOperation(oldops[o], start - cursor);
439  87 remain -= start - cursor;
440    }
441    }
442    // start inserting new ops
443  155 if (o < oldlen && editing && rlength > 0 && remain > 0)
444    {
445  155 switch (oldops[o])
446    {
447  106 case M:
448  106 if (rlength > remain)
449    {
450  72 addDeleted(remain);
451  72 deleted += remain;
452    }
453    else
454    {
455  34 deleted += rlength;
456  34 addDeleted(rlength);
457  34 if (remain - rlength > 0)
458    {
459  14 this.addOperation(M, remain - rlength); // add remaining back.
460    }
461  34 rlength = 0;
462  34 remain = 0;
463    }
464  106 break;
465  49 case I:
466  49 if (remain - rlength > 0)
467    {
468    // only remove some gaps
469  13 addInsertion(remain - rlength);
470  13 rlength = 0;
471    }
472  49 break;
473  0 case D:
474  0 throw new Error(
475    MessageManager.getString("error.implementation_error")); // do
476    // nothing;
477  0 default:
478  0 throw new Error(MessageManager.formatMessage(
479    "error.implementation_error_unknown_operation",
480    new String[]
481    { new StringBuffer(oldops[o]).toString() }));
482    }
483  155 rlength -= remain;
484  155 remain = oldrange[++o]; // number of op characters left to edit
485    }
486    }
487    // add remaining
488  200 while (o < oldlen)
489    {
490  99 addOperation(oldops[o], oldrange[o++]);
491    }
492    // if (cursor<(start+1)) {
493    // ran out of ops - nothing to do here ?
494    // addInsertion(start-cursor);
495    // }
496  101 return deleted;
497    }
498   
499    /**
500    * Deleted regions mean that there will be discontinuous sequence numbering in
501    * the sequence returned by getSeq(char).
502    *
503    * @return true if there deletions
504    */
 
505  1 toggle public boolean hasDeletedRegions()
506    {
507  9 for (int i = 0; i < length; i++)
508    {
509  8 if (operation[i] == D)
510    {
511  0 return true;
512    }
513    }
514  1 return false;
515    }
516   
517    /**
518    * enumerate the ranges on seq that are marked as deleted in this cigar
519    *
520    * @return int[] { vis_start, sym_start, length }
521    */
 
522  25 toggle public int[] getDeletedRegions()
523    {
524  25 if (length == 0)
525    {
526  1 return null;
527    }
528  24 Vector dr = new Vector();
529  24 int cursor = 0, vcursor = 0;
530  68 for (int i = 0; i < length; i++)
531    {
532  44 switch (operation[i])
533    {
534  28 case M:
535  28 cursor += range[i];
536  28 break;
537  0 case I:
538  0 vcursor += range[i];
539  0 break;
540  16 case D:
541  16 dr.addElement(new int[] { vcursor, cursor, range[i] });
542  16 cursor += range[i];
543    }
544    }
545  24 if (dr.size() == 0)
546    {
547  14 return null;
548    }
549  10 int[] delregions = new int[dr.size() * 3];
550  26 for (int i = 0, l = dr.size(); i < l; i++)
551    {
552  16 int[] reg = (int[]) dr.elementAt(i);
553  16 delregions[i * 3] = reg[0];
554  16 delregions[i * 3 + 1] = reg[1];
555  16 delregions[i * 3 + 2] = reg[2];
556    }
557  10 return delregions;
558    }
559   
560    /**
561    * sum of ranges in cigar string
562    *
563    * @return int number of residues hidden, matched, or gaps inserted into
564    * sequence
565    */
 
566  1 toggle public int getFullWidth()
567    {
568  1 int w = 0;
569  1 if (range != null)
570    {
571  9 for (int i = 0; i < length; i++)
572    {
573  8 w += range[i];
574    }
575    }
576  1 return w;
577    }
578   
579    /**
580    * Visible length of aligned sequence
581    *
582    * @return int length of including gaps and less hidden regions
583    */
 
584  37 toggle public int getWidth()
585    {
586  37 int w = 0;
587  37 if (range != null)
588    {
589  165 for (int i = 0; i < length; i++)
590    {
591  129 if (operation[i] == M || operation[i] == I)
592    {
593  113 w += range[i];
594    }
595    }
596    }
597  37 return w;
598    }
599   
600    /**
601    * mark a range of inserted residues
602    *
603    * @param range
604    * int
605    */
 
606  13 toggle public void addInsertion(int range)
607    {
608  13 this.addOperation(I, range);
609    }
610   
611    /**
612    * mark the next range residues as hidden (not aligned) or deleted
613    *
614    * @param range
615    * int
616    */
 
617  176 toggle public void addDeleted(int range)
618    {
619  176 this.addOperation(D, range);
620    }
621   
622    /**
623    * Modifies operation list to delete columns from start to end (inclusive)
624    * editing will remove insertion operations, and convert matches to deletions
625    *
626    * @param start
627    * alignment column
628    * @param end
629    * alignment column
630    * @return boolean true if residues were marked as deleted. public boolean
631    * deleteRange(int start, int end) { boolean deleted = false; int op =
632    * 0, prevop = -1, firstm = -1, lastm = -1, postop = -1; int width =
633    * 0; // zero'th column if (length > 0) { // find operation bracketing
634    * start of the range do { if (operation[op] != D) { width +=
635    * range[prevop = op]; } op++; } while (op < length && width < start);
636    * } if (width < start) { // run off end - add more operations up to
637    * deletion. addInsertion(start - width); } else { // edit existing
638    * operations. op = prevop; width -= range[prevop]; int[] oldrange =
639    * range; char[] oldops = operation; range = new int[oldrange.length];
640    * operation = new char[oldops.length]; if (op < length) { do { if
641    * (operation[op] != D) { width += range[postop = op]; } op++; } while
642    * (op < length && width <= end); } } if (deleted == true) {
643    * addDeleted(end - start + 1); } return deleted; }
644    */
645    /**
646    * Return an ENSEMBL style cigar string where D may indicates excluded parts
647    * of seq
648    *
649    * @return String of form ([0-9]+[IMD])+
650    */
 
651  16 toggle public String getCigarstring()
652    {
653  16 StringBuffer cigarString = new StringBuffer();
654  84 for (int i = 0; i < length; i++)
655    {
656  68 cigarString.append("" + range[i]);
657  68 cigarString.append(operation[i]);
658    }
659  16 return cigarString.toString();
660    }
661    }