1. Project Clover database Wed Nov 13 2024 18:27:33 GMT
  2. Package jalview.datamodel

File MappedFeatures.java

 

Coverage histogram

../../img/srcFileCovDistChart10.png
0% of files have more coverage

Code metrics

40
69
5
1
317
157
30
0.43
13.8
5
6

Classes

Class
Line #
Actions
MappedFeatures 42 69 30
0.9473684494.7%
 

Contributing tests

This file is covered by 4 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.datamodel;
22   
23    import java.util.Locale;
24   
25    import java.util.HashSet;
26    import java.util.List;
27    import java.util.Set;
28   
29    import jalview.io.gff.Gff3Helper;
30    import jalview.schemes.ResidueProperties;
31    import jalview.util.MapList;
32    import jalview.util.MappingUtils;
33    import jalview.util.StringUtils;
34   
35    /**
36    * A data bean to hold a list of mapped sequence features (e.g. CDS features
37    * mapped from protein), and the mapping between the sequences. It also provides
38    * a method to derive peptide variants from codon variants.
39    *
40    * @author gmcarstairs
41    */
 
42    public class MappedFeatures
43    {
44    /*
45    * VEP CSQ:HGVSp (if present) is a short-cut to the protein variant consequence
46    */
47    private static final String HGV_SP = "HGVSp";
48   
49    private static final String CSQ = "CSQ";
50   
51    /*
52    * the sequence the mapped features are on
53    */
54    private final SequenceI featureSequence;
55   
56    /*
57    * the mapping between sequences;
58    * NB this could be in either sense (from or to featureSequence)
59    */
60    private final Mapping mapping;
61   
62    /*
63    * features on featureSequence that overlap the mapped positions
64    */
65    public final List<SequenceFeature> features;
66   
67    /*
68    * the residue position in the sequence mapped to
69    */
70    private final int toPosition;
71   
72    /*
73    * the residue at toPosition
74    */
75    private final char toResidue;
76   
77    /*
78    * if the mapping is 3:1 or 1:3 (peptide to CDS), this holds the
79    * mapped positions i.e. codon base positions in CDS; to
80    * support calculation of peptide variants from alleles
81    */
82    private final int[] codonPos;
83   
84    private final char[] baseCodon;
85   
86    /**
87    * Constructor
88    *
89    * @param theMapping
90    * sequence mapping (which may be either to, or from, the sequence
91    * holding the linked features)
92    * @param featureSeq
93    * the sequence hosting the virtual features
94    * @param pos
95    * the residue position in the sequence mapped to
96    * @param res
97    * the residue character at position pos
98    * @param theFeatures
99    * list of mapped features found in the 'featureSeq' sequence at the
100    * mapped position(s)
101    */
 
102  7 toggle public MappedFeatures(Mapping theMapping, SequenceI featureSeq, int pos,
103    char res, List<SequenceFeature> theFeatures)
104    {
105  7 mapping = theMapping;
106  7 featureSequence = featureSeq;
107  7 toPosition = pos;
108  7 toResidue = res;
109  7 features = theFeatures;
110   
111    /*
112    * determine codon positions and canonical codon
113    * for a peptide-to-CDS mapping
114    */
115  7 int[] codonIntervals = mapping.getMap().locateInFrom(toPosition,
116    toPosition);
117  7 int[] codonPositions = codonIntervals == null ? null
118    : MappingUtils.flattenRanges(codonIntervals);
119  7 if (codonPositions != null && codonPositions.length == 3)
120    {
121  6 codonPos = codonPositions;
122  6 baseCodon = new char[3];
123  6 int cdsStart = featureSequence.getStart();
124  6 baseCodon[0] = Character.toUpperCase(
125    featureSequence.getCharAt(codonPos[0] - cdsStart));
126  6 baseCodon[1] = Character.toUpperCase(
127    featureSequence.getCharAt(codonPos[1] - cdsStart));
128  6 baseCodon[2] = Character.toUpperCase(
129    featureSequence.getCharAt(codonPos[2] - cdsStart));
130    }
131    else
132    {
133  1 codonPos = null;
134  1 baseCodon = null;
135    }
136    }
137   
138    /**
139    * Computes and returns comma-delimited HGVS notation peptide variants derived
140    * from codon allele variants. If no variants are found, answers an empty
141    * string. The peptide variant is either simply read from the "CSQ:HGVSp"
142    * attribute if present, else computed based on the "alleles" attribute if
143    * present. If neither attribute is found, no variant (empty string) is
144    * returned.
145    *
146    * @param sf
147    * a sequence feature (which must be one of those held in this
148    * object)
149    * @return
150    */
 
151  14 toggle public String findProteinVariants(SequenceFeature sf)
152    {
153  14 if (!features.contains(sf) || baseCodon == null)
154    {
155  2 return "";
156    }
157   
158    /*
159    * VCF data may already contain the protein consequence
160    */
161  12 String hgvsp = sf.getValueAsString(CSQ, HGV_SP);
162  12 if (hgvsp != null)
163    {
164  3 int colonPos = hgvsp.lastIndexOf(':');
165  3 if (colonPos >= 0)
166    {
167  2 String var = hgvsp.substring(colonPos + 1);
168  2 if (var.contains("p.")) // sanity check
169    {
170  1 return var;
171    }
172    }
173    }
174   
175    /*
176    * otherwise, compute codon and peptide variant
177    */
178  11 int cdsPos = sf.getBegin();
179  11 if (cdsPos != sf.getEnd())
180    {
181    // not handling multi-locus variant features
182  1 return "";
183    }
184  10 if (cdsPos != codonPos[0] && cdsPos != codonPos[1]
185    && cdsPos != codonPos[2])
186    {
187    // e.g. feature on intron within spliced codon!
188  0 return "";
189    }
190   
191  10 String alls = (String) sf.getValue(Gff3Helper.ALLELES);
192  10 if (alls == null)
193    {
194  1 return "";
195    }
196   
197  9 String from3 = StringUtils.toSentenceCase(
198    ResidueProperties.aa2Triplet.get(String.valueOf(toResidue)));
199   
200    /*
201    * make a peptide variant for each SNP allele
202    * e.g. C,G,T gives variants G and T for base C
203    */
204  9 Set<String> variantPeptides = new HashSet<>();
205  9 String[] alleles = alls.toUpperCase(Locale.ROOT).split(",");
206  9 StringBuilder vars = new StringBuilder();
207   
208  9 for (String allele : alleles)
209    {
210  23 allele = allele.trim().toUpperCase(Locale.ROOT);
211  23 if (allele.length() > 1 || "-".equals(allele))
212    {
213  4 continue; // multi-locus variant
214    }
215  19 char[] variantCodon = new char[3];
216  19 variantCodon[0] = baseCodon[0];
217  19 variantCodon[1] = baseCodon[1];
218  19 variantCodon[2] = baseCodon[2];
219   
220    /*
221    * poke variant base into canonical codon;
222    * ignore first 'allele' (canonical base)
223    */
224  19 final int i = cdsPos == codonPos[0] ? 0
225  12 : (cdsPos == codonPos[1] ? 1 : 2);
226  19 variantCodon[i] = allele.toUpperCase(Locale.ROOT).charAt(0);
227  19 if (variantCodon[i] == baseCodon[i])
228    {
229  9 continue;
230    }
231  10 String codon = new String(variantCodon);
232  10 String peptide = ResidueProperties.codonTranslate(codon);
233  10 boolean synonymous = toResidue == peptide.charAt(0);
234  10 StringBuilder var = new StringBuilder();
235  10 if (synonymous)
236    {
237    /*
238    * synonymous variant notation e.g. c.1062C>A(p.=)
239    */
240  3 var.append("c.").append(String.valueOf(cdsPos))
241    .append(String.valueOf(baseCodon[i])).append(">")
242    .append(String.valueOf(variantCodon[i])).append("(p.=)");
243    }
244    else
245    {
246    /*
247    * missense variant notation e.g. p.Arg355Met
248    */
249  7 String to3 = ResidueProperties.STOP.equals(peptide) ? "Ter"
250    : StringUtils.toSentenceCase(
251    ResidueProperties.aa2Triplet.get(peptide));
252  7 var.append("p.").append(from3).append(String.valueOf(toPosition))
253    .append(to3);
254    }
255  10 if (!variantPeptides.contains(peptide)) // duplicate consequence
256    {
257  10 variantPeptides.add(peptide);
258  10 if (vars.length() > 0)
259    {
260  1 vars.append(",");
261    }
262  10 vars.append(var);
263    }
264    }
265   
266  9 return vars.toString();
267    }
268   
269    /**
270    * Answers the name of the linked sequence holding any mapped features
271    *
272    * @return
273    */
 
274  2 toggle public String getLinkedSequenceName()
275    {
276  2 return featureSequence == null ? null : featureSequence.getName();
277    }
278   
279    /**
280    * Answers the mapped ranges (as one or more [start, end] positions) which
281    * correspond to the given [begin, end] range of the linked sequence.
282    *
283    * <pre>
284    * Example: MappedFeatures with CDS features mapped to peptide
285    * CDS/200-220 gtc aac TGa acGt att AAC tta
286    * mapped to PEP/6-7 WN by mapping [206, 207, 210, 210, 215, 217] to [6, 7]
287    * getMappedPositions(206, 206) should return [6, 6]
288    * getMappedPositions(200, 214) should return [6, 6]
289    * getMappedPositions(210, 215) should return [6, 7]
290    * </pre>
291    *
292    * @param begin
293    * @param end
294    * @return
295    */
 
296  5 toggle public int[] getMappedPositions(int begin, int end)
297    {
298  5 MapList map = mapping.getMap();
299  5 return mapping.to == featureSequence ? map.getOverlapsInFrom(begin, end)
300    : map.getOverlapsInTo(begin, end);
301    }
302   
303    /**
304    * Answers true if the linked features are on coding sequence, false if on
305    * peptide
306    *
307    * @return
308    */
 
309  4 toggle public boolean isFromCds()
310    {
311  4 if (mapping.getMap().getFromRatio() == 3)
312    {
313  4 return mapping.to != featureSequence;
314    }
315  0 return mapping.to == featureSequence;
316    }
317    }