Clover icon

Coverage Report

  1. Project Clover database Thu Aug 13 2020 12:04:21 BST
  2. Package jalview.datamodel

File MappedFeatures.java

 

Coverage histogram

../../img/srcFileCovDistChart10.png
0% of files have more coverage

Code metrics

40
69
5
1
315
156
30
0.43
13.8
5
6

Classes

Class Line # Actions
MappedFeatures 40 69 30
0.929824693%
 

Contributing tests

This file is covered by 4 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.datamodel;
22   
23    import java.util.HashSet;
24    import java.util.List;
25    import java.util.Set;
26   
27    import jalview.io.gff.Gff3Helper;
28    import jalview.schemes.ResidueProperties;
29    import jalview.util.MapList;
30    import jalview.util.MappingUtils;
31    import jalview.util.StringUtils;
32   
33    /**
34    * A data bean to hold a list of mapped sequence features (e.g. CDS features
35    * mapped from protein), and the mapping between the sequences. It also provides
36    * a method to derive peptide variants from codon variants.
37    *
38    * @author gmcarstairs
39    */
 
40    public class MappedFeatures
41    {
42    /*
43    * VEP CSQ:HGVSp (if present) is a short-cut to the protein variant consequence
44    */
45    private static final String HGV_SP = "HGVSp";
46   
47    private static final String CSQ = "CSQ";
48   
49    /*
50    * the sequence the mapped features are on
51    */
52    private final SequenceI featureSequence;
53   
54    /*
55    * the mapping between sequences;
56    * NB this could be in either sense (from or to featureSequence)
57    */
58    private final Mapping mapping;
59   
60    /*
61    * features on featureSequence that overlap the mapped positions
62    */
63    public final List<SequenceFeature> features;
64   
65    /*
66    * the residue position in the sequence mapped to
67    */
68    private final int toPosition;
69   
70    /*
71    * the residue at toPosition
72    */
73    private final char toResidue;
74   
75    /*
76    * if the mapping is 3:1 or 1:3 (peptide to CDS), this holds the
77    * mapped positions i.e. codon base positions in CDS; to
78    * support calculation of peptide variants from alleles
79    */
80    private final int[] codonPos;
81   
82    private final char[] baseCodon;
83   
84    /**
85    * Constructor
86    *
87    * @param theMapping
88    * sequence mapping (which may be either to, or from, the sequence
89    * holding the linked features)
90    * @param featureSeq
91    * the sequence hosting the virtual features
92    * @param pos
93    * the residue position in the sequence mapped to
94    * @param res
95    * the residue character at position pos
96    * @param theFeatures
97    * list of mapped features found in the 'featureSeq' sequence at the
98    * mapped position(s)
99    */
 
100  7 toggle public MappedFeatures(Mapping theMapping, SequenceI featureSeq, int pos,
101    char res, List<SequenceFeature> theFeatures)
102    {
103  7 mapping = theMapping;
104  7 featureSequence = featureSeq;
105  7 toPosition = pos;
106  7 toResidue = res;
107  7 features = theFeatures;
108   
109    /*
110    * determine codon positions and canonical codon
111    * for a peptide-to-CDS mapping
112    */
113  7 int[] codonIntervals = mapping.getMap().locateInFrom(toPosition, toPosition);
114  7 int[] codonPositions = codonIntervals == null ? null
115    : MappingUtils.flattenRanges(codonIntervals);
116  7 if (codonPositions != null && codonPositions.length == 3)
117    {
118  6 codonPos = codonPositions;
119  6 baseCodon = new char[3];
120  6 int cdsStart = featureSequence.getStart();
121  6 baseCodon[0] = Character
122    .toUpperCase(featureSequence.getCharAt(codonPos[0] - cdsStart));
123  6 baseCodon[1] = Character
124    .toUpperCase(featureSequence.getCharAt(codonPos[1] - cdsStart));
125  6 baseCodon[2] = Character
126    .toUpperCase(featureSequence.getCharAt(codonPos[2] - cdsStart));
127    }
128    else
129    {
130  1 codonPos = null;
131  1 baseCodon = null;
132    }
133    }
134   
135    /**
136    * Computes and returns comma-delimited HGVS notation peptide variants derived
137    * from codon allele variants. If no variants are found, answers an empty
138    * string. The peptide variant is either simply read from the "CSQ:HGVSp"
139    * attribute if present, else computed based on the "alleles" attribute if
140    * present. If neither attribute is found, no variant (empty string) is
141    * returned.
142    *
143    * @param sf
144    * a sequence feature (which must be one of those held in this
145    * object)
146    * @return
147    */
 
148  13 toggle public String findProteinVariants(SequenceFeature sf)
149    {
150  13 if (!features.contains(sf) || baseCodon == null)
151    {
152  2 return "";
153    }
154   
155    /*
156    * VCF data may already contain the protein consequence
157    */
158  11 String hgvsp = sf.getValueAsString(CSQ, HGV_SP);
159  11 if (hgvsp != null)
160    {
161  3 int colonPos = hgvsp.lastIndexOf(':');
162  3 if (colonPos >= 0)
163    {
164  2 String var = hgvsp.substring(colonPos + 1);
165  2 if (var.contains("p.")) // sanity check
166    {
167  1 return var;
168    }
169    }
170    }
171   
172    /*
173    * otherwise, compute codon and peptide variant
174    */
175  10 int cdsPos = sf.getBegin();
176  10 if (cdsPos != sf.getEnd())
177    {
178    // not handling multi-locus variant features
179  0 return "";
180    }
181  10 if (cdsPos != codonPos[0] && cdsPos != codonPos[1]
182    && cdsPos != codonPos[2])
183    {
184    // e.g. feature on intron within spliced codon!
185  0 return "";
186    }
187   
188  10 String alls = (String) sf.getValue(Gff3Helper.ALLELES);
189  10 if (alls == null)
190    {
191  1 return "";
192    }
193   
194  9 String from3 = StringUtils.toSentenceCase(
195    ResidueProperties.aa2Triplet.get(String.valueOf(toResidue)));
196   
197    /*
198    * make a peptide variant for each SNP allele
199    * e.g. C,G,T gives variants G and T for base C
200    */
201  9 Set<String> variantPeptides = new HashSet<>();
202  9 String[] alleles = alls.toUpperCase().split(",");
203  9 StringBuilder vars = new StringBuilder();
204   
205  9 for (String allele : alleles)
206    {
207  23 allele = allele.trim().toUpperCase();
208  23 if (allele.length() > 1 || "-".equals(allele))
209    {
210  4 continue; // multi-locus variant
211    }
212  19 char[] variantCodon = new char[3];
213  19 variantCodon[0] = baseCodon[0];
214  19 variantCodon[1] = baseCodon[1];
215  19 variantCodon[2] = baseCodon[2];
216   
217    /*
218    * poke variant base into canonical codon;
219    * ignore first 'allele' (canonical base)
220    */
221  19 final int i = cdsPos == codonPos[0] ? 0
222  12 : (cdsPos == codonPos[1] ? 1 : 2);
223  19 variantCodon[i] = allele.toUpperCase().charAt(0);
224  19 if (variantCodon[i] == baseCodon[i])
225    {
226  9 continue;
227    }
228  10 String codon = new String(variantCodon);
229  10 String peptide = ResidueProperties.codonTranslate(codon);
230  10 boolean synonymous = toResidue == peptide.charAt(0);
231  10 StringBuilder var = new StringBuilder();
232  10 if (synonymous)
233    {
234    /*
235    * synonymous variant notation e.g. c.1062C>A(p.=)
236    */
237  3 var.append("c.").append(String.valueOf(cdsPos))
238    .append(String.valueOf(baseCodon[i])).append(">")
239    .append(String.valueOf(variantCodon[i]))
240    .append("(p.=)");
241    }
242    else
243    {
244    /*
245    * missense variant notation e.g. p.Arg355Met
246    */
247  7 String to3 = ResidueProperties.STOP.equals(peptide) ? "Ter"
248    : StringUtils.toSentenceCase(
249    ResidueProperties.aa2Triplet.get(peptide));
250  7 var.append("p.").append(from3).append(String.valueOf(toPosition))
251    .append(to3);
252    }
253  10 if (!variantPeptides.contains(peptide)) // duplicate consequence
254    {
255  10 variantPeptides.add(peptide);
256  10 if (vars.length() > 0)
257    {
258  1 vars.append(",");
259    }
260  10 vars.append(var);
261    }
262    }
263   
264  9 return vars.toString();
265    }
266   
267    /**
268    * Answers the name of the linked sequence holding any mapped features
269    *
270    * @return
271    */
 
272  1 toggle public String getLinkedSequenceName()
273    {
274  1 return featureSequence == null ? null : featureSequence.getName();
275    }
276   
277    /**
278    * Answers the mapped ranges (as one or more [start, end] positions) which
279    * correspond to the given [begin, end] range of the linked sequence.
280    *
281    * <pre>
282    * Example: MappedFeatures with CDS features mapped to peptide
283    * CDS/200-220 gtc aac TGa acGt att AAC tta
284    * mapped to PEP/6-7 WN by mapping [206, 207, 210, 210, 215, 217] to [6, 7]
285    * getMappedPositions(206, 206) should return [6, 6]
286    * getMappedPositions(200, 214) should return [6, 6]
287    * getMappedPositions(210, 215) should return [6, 7]
288    * </pre>
289    *
290    * @param begin
291    * @param end
292    * @return
293    */
 
294  8 toggle public int[] getMappedPositions(int begin, int end)
295    {
296  8 MapList map = mapping.getMap();
297  8 return mapping.to == featureSequence ? map.locateInFrom(begin, end)
298    : map.locateInTo(begin, end);
299    }
300   
301    /**
302    * Answers true if the linked features are on coding sequence, false if on
303    * peptide
304    *
305    * @return
306    */
 
307  2 toggle public boolean isFromCds()
308    {
309  2 if (mapping.getMap().getFromRatio() == 3)
310    {
311  2 return mapping.to != featureSequence;
312    }
313  0 return mapping.to == featureSequence;
314    }
315    }