Clover icon

jalviewX

  1. Project Clover database Wed Oct 31 2018 15:13:58 GMT
  2. Package jalview.analysis

File Dna.java

 

Coverage histogram

../../img/srcFileCovDistChart7.png
28% of files have more coverage

Code metrics

156
375
16
1
1,000
684
141
0.38
23.44
16
8.81

Classes

Class Line # Actions
Dna 50 375 141 199
0.6361974563.6%
 

Contributing tests

This file is covered by 15 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis;
22   
23    import jalview.api.AlignViewportI;
24    import jalview.datamodel.AlignedCodon;
25    import jalview.datamodel.AlignedCodonFrame;
26    import jalview.datamodel.Alignment;
27    import jalview.datamodel.AlignmentAnnotation;
28    import jalview.datamodel.AlignmentI;
29    import jalview.datamodel.Annotation;
30    import jalview.datamodel.DBRefEntry;
31    import jalview.datamodel.DBRefSource;
32    import jalview.datamodel.FeatureProperties;
33    import jalview.datamodel.GraphLine;
34    import jalview.datamodel.Mapping;
35    import jalview.datamodel.Sequence;
36    import jalview.datamodel.SequenceFeature;
37    import jalview.datamodel.SequenceI;
38    import jalview.schemes.ResidueProperties;
39    import jalview.util.Comparison;
40    import jalview.util.DBRefUtils;
41    import jalview.util.MapList;
42    import jalview.util.ShiftList;
43   
44    import java.util.ArrayList;
45    import java.util.Arrays;
46    import java.util.Comparator;
47    import java.util.Iterator;
48    import java.util.List;
49   
 
50    public class Dna
51    {
52    private static final String STOP_ASTERIX = "*";
53   
54    private static final Comparator<AlignedCodon> comparator = new CodonComparator();
55   
56    /*
57    * 'final' variables describe the inputs to the translation, which should not
58    * be modified.
59    */
60    private final List<SequenceI> selection;
61   
62    private final String[] seqstring;
63   
64    private final Iterator<int[]> contigs;
65   
66    private final char gapChar;
67   
68    private final AlignmentAnnotation[] annotations;
69   
70    private final int dnaWidth;
71   
72    private final AlignmentI dataset;
73   
74    private ShiftList vismapping;
75   
76    private int[] startcontigs;
77   
78    /*
79    * Working variables for the translation.
80    *
81    * The width of the translation-in-progress protein alignment.
82    */
83    private int aaWidth = 0;
84   
85    /*
86    * This array will be built up so that position i holds the codon positions
87    * e.g. [7, 9, 10] that match column i (base 0) in the aligned translation.
88    * Note this implies a contract that if two codons do not align exactly, their
89    * translated products must occupy different column positions.
90    */
91    private AlignedCodon[] alignedCodons;
92   
93    /**
94    * Constructor given a viewport and the visible contigs.
95    *
96    * @param viewport
97    * @param visibleContigs
98    */
 
99  23 toggle public Dna(AlignViewportI viewport, Iterator<int[]> visibleContigs)
100    {
101  23 this.selection = Arrays.asList(viewport.getSequenceSelection());
102  23 this.seqstring = viewport.getViewAsString(true);
103  23 this.contigs = visibleContigs;
104  23 this.gapChar = viewport.getGapCharacter();
105  23 this.annotations = viewport.getAlignment().getAlignmentAnnotation();
106  23 this.dnaWidth = viewport.getAlignment().getWidth();
107  23 this.dataset = viewport.getAlignment().getDataset();
108  23 initContigs();
109    }
110   
111    /**
112    * Initialise contigs used as starting point for translateCodingRegion
113    */
 
114  23 toggle private void initContigs()
115    {
116  23 vismapping = new ShiftList(); // map from viscontigs to seqstring
117    // intervals
118   
119  23 int npos = 0;
120  23 int[] lastregion = null;
121  23 ArrayList<Integer> tempcontigs = new ArrayList<>();
122  48 while (contigs.hasNext())
123    {
124  25 int[] region = contigs.next();
125  25 if (lastregion == null)
126    {
127  23 vismapping.addShift(npos, region[0]);
128    }
129    else
130    {
131    // hidden region
132  2 vismapping.addShift(npos, region[0] - lastregion[1] + 1);
133    }
134  25 lastregion = region;
135  25 tempcontigs.add(region[0]);
136  25 tempcontigs.add(region[1]);
137    }
138   
139  23 startcontigs = new int[tempcontigs.size()];
140  23 int i = 0;
141  23 for (Integer val : tempcontigs)
142    {
143  50 startcontigs[i] = val;
144  50 i++;
145    }
146  23 tempcontigs = null;
147    }
148   
149    /**
150    * Test whether codon positions cdp1 should align before, with, or after cdp2.
151    * Returns zero if all positions match (or either argument is null). Returns
152    * -1 if any position in the first codon precedes the corresponding position
153    * in the second codon. Else returns +1 (some position in the second codon
154    * precedes the corresponding position in the first).
155    *
156    * Note this is not necessarily symmetric, for example:
157    * <ul>
158    * <li>compareCodonPos([2,5,6], [3,4,5]) returns -1</li>
159    * <li>compareCodonPos([3,4,5], [2,5,6]) also returns -1</li>
160    * </ul>
161    *
162    * @param ac1
163    * @param ac2
164    * @return
165    */
 
166  3341 toggle public static final int compareCodonPos(AlignedCodon ac1, AlignedCodon ac2)
167    {
168  3341 return comparator.compare(ac1, ac2);
169    // return jalview_2_8_2compare(ac1, ac2);
170    }
171   
172    /**
173    * Codon comparison up to Jalview 2.8.2. This rule is sequence order dependent
174    * - see http://issues.jalview.org/browse/JAL-1635
175    *
176    * @param ac1
177    * @param ac2
178    * @return
179    */
 
180  0 toggle private static int jalview_2_8_2compare(AlignedCodon ac1,
181    AlignedCodon ac2)
182    {
183  0 if (ac1 == null || ac2 == null || (ac1.equals(ac2)))
184    {
185  0 return 0;
186    }
187  0 if (ac1.pos1 < ac2.pos1 || ac1.pos2 < ac2.pos2 || ac1.pos3 < ac2.pos3)
188    {
189    // one base in cdp1 precedes the corresponding base in the other codon
190  0 return -1;
191    }
192    // one base in cdp1 appears after the corresponding base in the other codon.
193  0 return 1;
194    }
195   
196    /**
197    *
198    * @return
199    */
 
200  22 toggle public AlignmentI translateCdna()
201    {
202  22 AlignedCodonFrame acf = new AlignedCodonFrame();
203   
204  22 alignedCodons = new AlignedCodon[dnaWidth];
205   
206  22 int s;
207  22 int sSize = selection.size();
208  22 List<SequenceI> pepseqs = new ArrayList<>();
209  238 for (s = 0; s < sSize; s++)
210    {
211  216 SequenceI newseq = translateCodingRegion(selection.get(s),
212    seqstring[s], acf, pepseqs);
213   
214  216 if (newseq != null)
215    {
216  216 pepseqs.add(newseq);
217  216 SequenceI ds = newseq;
218  216 if (dataset != null)
219    {
220  0 while (ds.getDatasetSequence() != null)
221    {
222  0 ds = ds.getDatasetSequence();
223    }
224  0 dataset.addSequence(ds);
225    }
226    }
227    }
228   
229  22 SequenceI[] newseqs = pepseqs.toArray(new SequenceI[pepseqs.size()]);
230  22 AlignmentI al = new Alignment(newseqs);
231    // ensure we look aligned.
232  22 al.padGaps();
233    // link the protein translation to the DNA dataset
234  22 al.setDataset(dataset);
235  22 translateAlignedAnnotations(al, acf);
236  22 al.addCodonFrame(acf);
237  22 return al;
238    }
239   
240    /**
241    * fake the collection of DbRefs with associated exon mappings to identify if
242    * a translation would generate distinct product in the currently selected
243    * region.
244    *
245    * @param selection
246    * @param viscontigs
247    * @return
248    */
 
249  0 toggle public static boolean canTranslate(SequenceI[] selection,
250    int viscontigs[])
251    {
252  0 for (int gd = 0; gd < selection.length; gd++)
253    {
254  0 SequenceI dna = selection[gd];
255  0 DBRefEntry[] dnarefs = DBRefUtils.selectRefs(dna.getDBRefs(),
256    jalview.datamodel.DBRefSource.DNACODINGDBS);
257  0 if (dnarefs != null)
258    {
259    // intersect with pep
260  0 List<DBRefEntry> mappedrefs = new ArrayList<>();
261  0 DBRefEntry[] refs = dna.getDBRefs();
262  0 for (int d = 0; d < refs.length; d++)
263    {
264  0 if (refs[d].getMap() != null && refs[d].getMap().getMap() != null
265    && refs[d].getMap().getMap().getFromRatio() == 3
266    && refs[d].getMap().getMap().getToRatio() == 1)
267    {
268  0 mappedrefs.add(refs[d]); // add translated protein maps
269    }
270    }
271  0 dnarefs = mappedrefs.toArray(new DBRefEntry[mappedrefs.size()]);
272  0 for (int d = 0; d < dnarefs.length; d++)
273    {
274  0 Mapping mp = dnarefs[d].getMap();
275  0 if (mp != null)
276    {
277  0 for (int vc = 0; vc < viscontigs.length; vc += 2)
278    {
279  0 int[] mpr = mp.locateMappedRange(viscontigs[vc],
280    viscontigs[vc + 1]);
281  0 if (mpr != null)
282    {
283  0 return true;
284    }
285    }
286    }
287    }
288    }
289    }
290  0 return false;
291    }
292   
293    /**
294    * Translate nucleotide alignment annotations onto translated amino acid
295    * alignment using codon mapping codons
296    *
297    * @param al
298    * the translated protein alignment
299    */
 
300  22 toggle protected void translateAlignedAnnotations(AlignmentI al,
301    AlignedCodonFrame acf)
302    {
303    // Can only do this for columns with consecutive codons, or where
304    // annotation is sequence associated.
305   
306  22 if (annotations != null)
307    {
308  22 for (AlignmentAnnotation annotation : annotations)
309    {
310    /*
311    * Skip hidden or autogenerated annotation. Also (for now), RNA
312    * secondary structure annotation. If we want to show this against
313    * protein we need a smarter way to 'translate' without generating
314    * invalid (unbalanced) structure annotation.
315    */
316  316 if (annotation.autoCalculated || !annotation.visible
317    || annotation.isRNA())
318    {
319  316 continue;
320    }
321   
322  0 int aSize = aaWidth;
323  0 Annotation[] anots = (annotation.annotations == null) ? null
324    : new Annotation[aSize];
325  0 if (anots != null)
326    {
327  0 for (int a = 0; a < aSize; a++)
328    {
329    // process through codon map.
330  0 if (a < alignedCodons.length && alignedCodons[a] != null
331    && alignedCodons[a].pos1 == (alignedCodons[a].pos3 - 2))
332    {
333  0 anots[a] = getCodonAnnotation(alignedCodons[a],
334    annotation.annotations);
335    }
336    }
337    }
338   
339  0 AlignmentAnnotation aa = new AlignmentAnnotation(annotation.label,
340    annotation.description, anots);
341  0 aa.graph = annotation.graph;
342  0 aa.graphGroup = annotation.graphGroup;
343  0 aa.graphHeight = annotation.graphHeight;
344  0 if (annotation.getThreshold() != null)
345    {
346  0 aa.setThreshold(new GraphLine(annotation.getThreshold()));
347    }
348  0 if (annotation.hasScore)
349    {
350  0 aa.setScore(annotation.getScore());
351    }
352   
353  0 final SequenceI seqRef = annotation.sequenceRef;
354  0 if (seqRef != null)
355    {
356  0 SequenceI aaSeq = acf.getAaForDnaSeq(seqRef);
357  0 if (aaSeq != null)
358    {
359    // aa.compactAnnotationArray(); // throw away alignment annotation
360    // positioning
361  0 aa.setSequenceRef(aaSeq);
362    // rebuild mapping
363  0 aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true);
364  0 aa.adjustForAlignment();
365  0 aaSeq.addAlignmentAnnotation(aa);
366    }
367    }
368  0 al.addAnnotation(aa);
369    }
370    }
371    }
372   
 
373  0 toggle private static Annotation getCodonAnnotation(AlignedCodon is,
374    Annotation[] annotations)
375    {
376    // Have a look at all the codon positions for annotation and put the first
377    // one found into the translated annotation pos.
378  0 int contrib = 0;
379  0 Annotation annot = null;
380  0 for (int p = 1; p <= 3; p++)
381    {
382  0 int dnaCol = is.getBaseColumn(p);
383  0 if (annotations[dnaCol] != null)
384    {
385  0 if (annot == null)
386    {
387  0 annot = new Annotation(annotations[dnaCol]);
388  0 contrib = 1;
389    }
390    else
391    {
392    // merge with last
393  0 Annotation cpy = new Annotation(annotations[dnaCol]);
394  0 if (annot.colour == null)
395    {
396  0 annot.colour = cpy.colour;
397    }
398  0 if (annot.description == null || annot.description.length() == 0)
399    {
400  0 annot.description = cpy.description;
401    }
402  0 if (annot.displayCharacter == null)
403    {
404  0 annot.displayCharacter = cpy.displayCharacter;
405    }
406  0 if (annot.secondaryStructure == 0)
407    {
408  0 annot.secondaryStructure = cpy.secondaryStructure;
409    }
410  0 annot.value += cpy.value;
411  0 contrib++;
412    }
413    }
414    }
415  0 if (contrib > 1)
416    {
417  0 annot.value /= contrib;
418    }
419  0 return annot;
420    }
421   
422    /**
423    * Translate a na sequence
424    *
425    * @param selection
426    * sequence displayed under viscontigs visible columns
427    * @param seqstring
428    * ORF read in some global alignment reference frame
429    * @param acf
430    * Definition of global ORF alignment reference frame
431    * @param proteinSeqs
432    * @return sequence ready to be added to alignment.
433    */
 
434  216 toggle protected SequenceI translateCodingRegion(SequenceI selection,
435    String seqstring, AlignedCodonFrame acf,
436    List<SequenceI> proteinSeqs)
437    {
438  216 List<int[]> skip = new ArrayList<>();
439  216 int[] skipint = null;
440   
441  216 int npos = 0;
442  216 int vc = 0;
443   
444  216 int[] scontigs = new int[startcontigs.length];
445  216 System.arraycopy(startcontigs, 0, scontigs, 0, startcontigs.length);
446   
447    // allocate a roughly sized buffer for the protein sequence
448  216 StringBuilder protein = new StringBuilder(seqstring.length() / 2);
449  216 String seq = seqstring.replace('U', 'T').replace('u', 'T');
450  216 char codon[] = new char[3];
451  216 int cdp[] = new int[3];
452  216 int rf = 0;
453  216 int lastnpos = 0;
454  216 int nend;
455  216 int aspos = 0;
456  216 int resSize = 0;
457  6256 for (npos = 0, nend = seq.length(); npos < nend; npos++)
458    {
459  6040 if (!Comparison.isGap(seq.charAt(npos)))
460    {
461  6010 cdp[rf] = npos; // store position
462  6010 codon[rf++] = seq.charAt(npos); // store base
463    }
464  6040 if (rf == 3)
465    {
466    /*
467    * Filled up a reading frame...
468    */
469  1882 AlignedCodon alignedCodon = new AlignedCodon(cdp[0], cdp[1],
470    cdp[2]);
471  1882 String aa = ResidueProperties.codonTranslate(new String(codon));
472  1882 rf = 0;
473  1882 final String gapString = String.valueOf(gapChar);
474  1882 if (aa == null)
475    {
476  1 aa = gapString;
477  1 if (skipint == null)
478    {
479  1 skipint = new int[] { alignedCodon.pos1,
480    alignedCodon.pos3 /*
481    * cdp[0],
482    * cdp[2]
483    */ };
484    }
485  1 skipint[1] = alignedCodon.pos3; // cdp[2];
486    }
487    else
488    {
489  1881 if (skipint != null)
490    {
491    // edit scontigs
492  1 skipint[0] = vismapping.shift(skipint[0]);
493  1 skipint[1] = vismapping.shift(skipint[1]);
494  2 for (vc = 0; vc < scontigs.length;)
495    {
496  1 if (scontigs[vc + 1] < skipint[0])
497    {
498    // before skipint starts
499  0 vc += 2;
500  0 continue;
501    }
502  1 if (scontigs[vc] > skipint[1])
503    {
504    // finished editing so
505  0 break;
506    }
507    // Edit the contig list to include the skipped region which did
508    // not translate
509  1 int[] t;
510    // from : s1 e1 s2 e2 s3 e3
511    // to s: s1 e1 s2 k0 k1 e2 s3 e3
512    // list increases by one unless one boundary (s2==k0 or e2==k1)
513    // matches, and decreases by one if skipint intersects whole
514    // visible contig
515  1 if (scontigs[vc] <= skipint[0])
516    {
517  1 if (skipint[0] == scontigs[vc])
518    {
519    // skipint at start of contig
520    // shift the start of this contig
521  0 if (scontigs[vc + 1] > skipint[1])
522    {
523  0 scontigs[vc] = skipint[1];
524  0 vc += 2;
525    }
526    else
527    {
528  0 if (scontigs[vc + 1] == skipint[1])
529    {
530    // remove the contig
531  0 t = new int[scontigs.length - 2];
532  0 if (vc > 0)
533    {
534  0 System.arraycopy(scontigs, 0, t, 0, vc - 1);
535    }
536  0 if (vc + 2 < t.length)
537    {
538  0 System.arraycopy(scontigs, vc + 2, t, vc,
539    t.length - vc + 2);
540    }
541  0 scontigs = t;
542    }
543    else
544    {
545    // truncate contig to before the skipint region
546  0 scontigs[vc + 1] = skipint[0] - 1;
547  0 vc += 2;
548    }
549    }
550    }
551    else
552    {
553    // scontig starts before start of skipint
554  1 if (scontigs[vc + 1] < skipint[1])
555    {
556    // skipint truncates end of scontig
557  0 scontigs[vc + 1] = skipint[0] - 1;
558  0 vc += 2;
559    }
560    else
561    {
562    // divide region to new contigs
563  1 t = new int[scontigs.length + 2];
564  1 System.arraycopy(scontigs, 0, t, 0, vc + 1);
565  1 t[vc + 1] = skipint[0];
566  1 t[vc + 2] = skipint[1];
567  1 System.arraycopy(scontigs, vc + 1, t, vc + 3,
568    scontigs.length - (vc + 1));
569  1 scontigs = t;
570  1 vc += 4;
571    }
572    }
573    }
574    }
575  1 skip.add(skipint);
576  1 skipint = null;
577    }
578  1881 if (aa.equals(ResidueProperties.STOP))
579    {
580  55 aa = STOP_ASTERIX;
581    }
582  1881 resSize++;
583    }
584  1882 boolean findpos = true;
585  5138 while (findpos)
586    {
587    /*
588    * Compare this codon's base positions with those currently aligned to
589    * this column in the translation.
590    */
591  3256 final int compareCodonPos = compareCodonPos(alignedCodon,
592    alignedCodons[aspos]);
593  3256 switch (compareCodonPos)
594    {
595  178 case -1:
596   
597    /*
598    * This codon should precede the mapped positions - need to insert a
599    * gap in all prior sequences.
600    */
601  178 insertAAGap(aspos, proteinSeqs);
602  178 findpos = false;
603  178 break;
604   
605  1374 case +1:
606   
607    /*
608    * This codon belongs after the aligned codons at aspos. Prefix it
609    * with a gap and try the next position.
610    */
611  1374 aa = gapString + aa;
612  1374 aspos++;
613  1374 break;
614   
615  1704 case 0:
616   
617    /*
618    * Exact match - codon 'belongs' at this translated position.
619    */
620  1704 findpos = false;
621    }
622    }
623  1882 protein.append(aa);
624  1882 lastnpos = npos;
625  1882 if (alignedCodons[aspos] == null)
626    {
627    // mark this column as aligning to this aligned reading frame
628  460 alignedCodons[aspos] = alignedCodon;
629    }
630  1422 else if (!alignedCodons[aspos].equals(alignedCodon))
631    {
632  0 throw new IllegalStateException(
633    "Tried to coalign " + alignedCodons[aspos].toString()
634    + " with " + alignedCodon.toString());
635    }
636  1882 if (aspos >= aaWidth)
637    {
638    // update maximum alignment width
639  443 aaWidth = aspos;
640    }
641    // ready for next translated reading frame alignment position (if any)
642  1882 aspos++;
643    }
644    }
645  216 if (resSize > 0)
646    {
647  216 SequenceI newseq = new Sequence(selection.getName(),
648    protein.toString());
649  216 if (rf != 0)
650    {
651  188 final String errMsg = "trimming contigs for incomplete terminal codon.";
652  188 System.err.println(errMsg);
653    // map and trim contigs to ORF region
654  188 vc = scontigs.length - 1;
655  188 lastnpos = vismapping.shift(lastnpos); // place npos in context of
656    // whole dna alignment (rather
657    // than visible contigs)
658    // incomplete ORF could be broken over one or two visible contig
659    // intervals.
660  376 while (vc >= 0 && scontigs[vc] > lastnpos)
661    {
662  188 if (vc > 0 && scontigs[vc - 1] > lastnpos)
663    {
664  0 vc -= 2;
665    }
666    else
667    {
668    // correct last interval in list.
669  188 scontigs[vc] = lastnpos;
670    }
671    }
672   
673  188 if (vc > 0 && (vc + 1) < scontigs.length)
674    {
675    // truncate map list to just vc elements
676  0 int t[] = new int[vc + 1];
677  0 System.arraycopy(scontigs, 0, t, 0, vc + 1);
678  0 scontigs = t;
679    }
680  188 if (vc <= 0)
681    {
682  0 scontigs = null;
683    }
684    }
685  216 if (scontigs != null)
686    {
687  216 npos = 0;
688    // map scontigs to actual sequence positions on selection
689  411 for (vc = 0; vc < scontigs.length; vc += 2)
690    {
691  219 scontigs[vc] = selection.findPosition(scontigs[vc]); // not from 1!
692  219 scontigs[vc + 1] = selection.findPosition(scontigs[vc + 1]); // exclusive
693  219 if (scontigs[vc + 1] == selection.getEnd())
694    {
695  24 break;
696    }
697    }
698    // trim trailing empty intervals.
699  216 if ((vc + 2) < scontigs.length)
700    {
701  0 int t[] = new int[vc + 2];
702  0 System.arraycopy(scontigs, 0, t, 0, vc + 2);
703  0 scontigs = t;
704    }
705    /*
706    * delete intervals in scontigs which are not translated. 1. map skip
707    * into sequence position intervals 2. truncate existing ranges and add
708    * new ranges to exclude untranslated regions. if (skip.size()>0) {
709    * Vector narange = new Vector(); for (vc=0; vc<scontigs.length; vc++) {
710    * narange.addElement(new int[] {scontigs[vc]}); } int sint=0,iv[]; vc =
711    * 0; while (sint<skip.size()) { skipint = (int[]) skip.elementAt(sint);
712    * do { iv = (int[]) narange.elementAt(vc); if (iv[0]>=skipint[0] &&
713    * iv[0]<=skipint[1]) { if (iv[0]==skipint[0]) { // delete beginning of
714    * range } else { // truncate range and create new one if necessary iv =
715    * (int[]) narange.elementAt(vc+1); if (iv[0]<=skipint[1]) { // truncate
716    * range iv[0] = skipint[1]; } else { } } } else if (iv[0]<skipint[0]) {
717    * iv = (int[]) narange.elementAt(vc+1); } } while (iv[0]) } }
718    */
719  216 MapList map = new MapList(scontigs, new int[] { 1, resSize }, 3, 1);
720   
721  216 transferCodedFeatures(selection, newseq, map);
722   
723    /*
724    * Construct a dataset sequence for our new peptide.
725    */
726  216 SequenceI rseq = newseq.deriveSequence();
727   
728    /*
729    * Store a mapping (between the dataset sequences for the two
730    * sequences).
731    */
732    // SIDE-EFFECT: acf stores the aligned sequence reseq; to remove!
733  216 acf.addMap(selection, rseq, map);
734  216 return rseq;
735    }
736    }
737    // register the mapping somehow
738    //
739  0 return null;
740    }
741   
742    /**
743    * Insert a gap into the aligned proteins and the codon mapping array.
744    *
745    * @param pos
746    * @param proteinSeqs
747    * @return
748    */
 
749  178 toggle protected void insertAAGap(int pos, List<SequenceI> proteinSeqs)
750    {
751  178 aaWidth++;
752  178 for (SequenceI seq : proteinSeqs)
753    {
754  598 seq.insertCharAt(pos, gapChar);
755    }
756   
757  178 checkCodonFrameWidth();
758  178 if (pos < aaWidth)
759    {
760  178 aaWidth++;
761   
762    /*
763    * Shift from [pos] to the end one to the right, and null out [pos]
764    */
765  178 System.arraycopy(alignedCodons, pos, alignedCodons, pos + 1,
766    alignedCodons.length - pos - 1);
767  178 alignedCodons[pos] = null;
768    }
769    }
770   
771    /**
772    * Check the codons array can accommodate a single insertion, if not resize
773    * it.
774    */
 
775  178 toggle protected void checkCodonFrameWidth()
776    {
777  178 if (alignedCodons[alignedCodons.length - 1] != null)
778    {
779    /*
780    * arraycopy insertion would bump a filled slot off the end, so expand.
781    */
782  0 AlignedCodon[] c = new AlignedCodon[alignedCodons.length + 10];
783  0 System.arraycopy(alignedCodons, 0, c, 0, alignedCodons.length);
784  0 alignedCodons = c;
785    }
786    }
787   
788    /**
789    * Given a peptide newly translated from a dna sequence, copy over and set any
790    * features on the peptide from the DNA.
791    *
792    * @param dna
793    * @param pep
794    * @param map
795    */
 
796  216 toggle private static void transferCodedFeatures(SequenceI dna, SequenceI pep,
797    MapList map)
798    {
799  216 DBRefEntry[] dnarefs = DBRefUtils.selectRefs(dna.getDBRefs(),
800    DBRefSource.DNACODINGDBS);
801  216 if (dnarefs != null)
802    {
803    // intersect with pep
804  0 for (int d = 0; d < dnarefs.length; d++)
805    {
806  0 Mapping mp = dnarefs[d].getMap();
807  0 if (mp != null)
808    {
809    }
810    }
811    }
812  216 for (SequenceFeature sf : dna.getFeatures().getAllFeatures())
813    {
814  0 if (FeatureProperties.isCodingFeature(null, sf.getType()))
815    {
816    // if (map.intersectsFrom(sf[f].begin, sf[f].end))
817    {
818   
819    }
820    }
821    }
822    }
823   
824    /**
825    * Returns an alignment consisting of the reversed (and optionally
826    * complemented) sequences set in this object's constructor
827    *
828    * @param complement
829    * @return
830    */
 
831  1 toggle public AlignmentI reverseCdna(boolean complement)
832    {
833  1 int sSize = selection.size();
834  1 List<SequenceI> reversed = new ArrayList<>();
835  2 for (int s = 0; s < sSize; s++)
836    {
837  1 SequenceI newseq = reverseSequence(selection.get(s).getName(),
838    seqstring[s], complement);
839   
840  1 if (newseq != null)
841    {
842  1 reversed.add(newseq);
843    }
844    }
845   
846  1 SequenceI[] newseqs = reversed.toArray(new SequenceI[reversed.size()]);
847  1 AlignmentI al = new Alignment(newseqs);
848  1 ((Alignment) al).createDatasetAlignment();
849  1 return al;
850    }
851   
852    /**
853    * Returns a reversed, and optionally complemented, sequence. The new
854    * sequence's name is the original name with "|rev" or "|revcomp" appended.
855    * aAcCgGtT and DNA ambiguity codes are complemented, any other characters are
856    * left unchanged.
857    *
858    * @param seq
859    * @param complement
860    * @return
861    */
 
862  3 toggle public static SequenceI reverseSequence(String seqName, String sequence,
863    boolean complement)
864    {
865  3 String newName = seqName + "|rev" + (complement ? "comp" : "");
866  3 char[] originalSequence = sequence.toCharArray();
867  3 int length = originalSequence.length;
868  3 char[] reversedSequence = new char[length];
869  3 int bases = 0;
870  63 for (int i = 0; i < length; i++)
871    {
872  60 char c = complement ? getComplement(originalSequence[i])
873    : originalSequence[i];
874  60 reversedSequence[length - i - 1] = c;
875  60 if (!Comparison.isGap(c))
876    {
877  45 bases++;
878    }
879    }
880  3 SequenceI reversed = new Sequence(newName, reversedSequence, 1, bases);
881  3 return reversed;
882    }
883   
884    /**
885    * Answers the reverse complement of the input string
886    *
887    * @see #getComplement(char)
888    * @param s
889    * @return
890    */
 
891  28 toggle public static String reverseComplement(String s)
892    {
893  28 StringBuilder sb = new StringBuilder(s.length());
894  60 for (int i = s.length() - 1; i >= 0; i--)
895    {
896  32 sb.append(Dna.getComplement(s.charAt(i)));
897    }
898  28 return sb.toString();
899    }
900   
901    /**
902    * Returns dna complement (preserving case) for aAcCgGtTuU. Ambiguity codes
903    * are treated as on http://reverse-complement.com/. Anything else is left
904    * unchanged.
905    *
906    * @param c
907    * @return
908    */
 
909  98 toggle public static char getComplement(char c)
910    {
911  98 char result = c;
912  98 switch (c)
913    {
914  7 case '-':
915  0 case '.':
916  0 case ' ':
917  7 break;
918  2 case 'a':
919  2 result = 't';
920  2 break;
921  13 case 'A':
922  13 result = 'T';
923  13 break;
924  3 case 'c':
925  3 result = 'g';
926  3 break;
927  18 case 'C':
928  18 result = 'G';
929  18 break;
930  3 case 'g':
931  3 result = 'c';
932  3 break;
933  13 case 'G':
934  13 result = 'C';
935  13 break;
936  3 case 't':
937  3 result = 'a';
938  3 break;
939  6 case 'T':
940  6 result = 'A';
941  6 break;
942  1 case 'u':
943  1 result = 'a';
944  1 break;
945  2 case 'U':
946  2 result = 'A';
947  2 break;
948  2 case 'r':
949  2 result = 'y';
950  2 break;
951  1 case 'R':
952  1 result = 'Y';
953  1 break;
954  1 case 'y':
955  1 result = 'r';
956  1 break;
957  2 case 'Y':
958  2 result = 'R';
959  2 break;
960  2 case 'k':
961  2 result = 'm';
962  2 break;
963  1 case 'K':
964  1 result = 'M';
965  1 break;
966  1 case 'm':
967  1 result = 'k';
968  1 break;
969  2 case 'M':
970  2 result = 'K';
971  2 break;
972  2 case 'b':
973  2 result = 'v';
974  2 break;
975  1 case 'B':
976  1 result = 'V';
977  1 break;
978  1 case 'v':
979  1 result = 'b';
980  1 break;
981  2 case 'V':
982  2 result = 'B';
983  2 break;
984  2 case 'd':
985  2 result = 'h';
986  2 break;
987  1 case 'D':
988  1 result = 'H';
989  1 break;
990  1 case 'h':
991  1 result = 'd';
992  1 break;
993  2 case 'H':
994  2 result = 'D';
995  2 break;
996    }
997   
998  98 return result;
999    }
1000    }