Clover icon

jalviewX

  1. Project Clover database Wed Oct 31 2018 15:13:58 GMT
  2. Package jalview.io.gff

File ExonerateHelper.java

 

Coverage histogram

../../../img/srcFileCovDistChart9.png
12% of files have more coverage

Code metrics

32
88
6
1
370
198
37
0.42
14.67
6
6.17

Classes

Class Line # Actions
ExonerateHelper 37 88 37 19
0.849206384.9%
 

Contributing tests

This file is covered by 14 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.io.gff;
22   
23    import jalview.datamodel.AlignedCodonFrame;
24    import jalview.datamodel.AlignmentI;
25    import jalview.datamodel.MappingType;
26    import jalview.datamodel.SequenceFeature;
27    import jalview.datamodel.SequenceI;
28    import jalview.util.MapList;
29   
30    import java.io.IOException;
31    import java.util.List;
32    import java.util.Map;
33   
34    /**
35    * A handler to parse GFF in the format generated by the exonerate tool
36    */
 
37    public class ExonerateHelper extends Gff2Helper
38    {
39    private static final String SIMILARITY = "similarity";
40   
41    private static final String GENOME2GENOME = "genome2genome";
42   
43    private static final String CDNA2GENOME = "cdna2genome";
44   
45    private static final String CODING2GENOME = "coding2genome";
46   
47    private static final String CODING2CODING = "coding2coding";
48   
49    private static final String PROTEIN2GENOME = "protein2genome";
50   
51    private static final String PROTEIN2DNA = "protein2dna";
52   
53    private static final String ALIGN = "Align";
54   
55    private static final String QUERY = "Query";
56   
57    private static final String TARGET = "Target";
58   
59    /**
60    * Process one GFF feature line (as modelled by SequenceFeature)
61    *
62    * @param seq
63    * the sequence with which this feature is associated
64    * @param gffColumns
65    * the sequence feature with ATTRIBUTES property containing any
66    * additional attributes
67    * @param align
68    * the alignment we are adding GFF to
69    * @param newseqs
70    * any new sequences referenced by the GFF
71    * @param relaxedIdMatching
72    * if true, match word tokens in sequence names
73    * @return true if the sequence feature should be added to the sequence, else
74    * false (i.e. it has been processed in another way e.g. to generate a
75    * mapping)
76    */
 
77  7 toggle @Override
78    public SequenceFeature processGff(SequenceI seq, String[] gffColumns,
79    AlignmentI align, List<SequenceI> newseqs,
80    boolean relaxedIdMatching)
81    {
82  7 String attr = gffColumns[ATTRIBUTES_COL];
83  7 Map<String, List<String>> set = parseNameValuePairs(attr);
84   
85  7 try
86    {
87  7 processGffSimilarity(set, seq, gffColumns, align, newseqs,
88    relaxedIdMatching);
89    } catch (IOException ivfe)
90    {
91  0 System.err.println(ivfe);
92    }
93   
94    /*
95    * return null to indicate we don't want to add a sequence feature for
96    * similarity (only process it to create mappings)
97    */
98  7 return null;
99    }
100   
101    /**
102    * Processes the 'Query' (or 'Target') and 'Align' properties associated with
103    * an exonerate GFF similarity feature; these properties define the mapping of
104    * the annotated range to a related sequence.
105    *
106    * @param set
107    * parsed GFF column 9 key/value(s)
108    * @param seq
109    * the sequence the GFF feature is on
110    * @param gff
111    * the GFF column data
112    * @param align
113    * the alignment the sequence belongs to, where any new mappings
114    * should be added
115    * @param newseqs
116    * a list of new 'virtual sequences' generated while parsing GFF
117    * @param relaxedIdMatching
118    * if true allow fuzzy search for a matching target sequence
119    * @throws IOException
120    */
 
121  11 toggle protected void processGffSimilarity(Map<String, List<String>> set,
122    SequenceI seq, String[] gff, AlignmentI align,
123    List<SequenceI> newseqs, boolean relaxedIdMatching)
124    throws IOException
125    {
126    /*
127    * exonerate may be run with
128    * --showquerygff - outputs 'features on the query' e.g. (protein2genome)
129    * Target <dnaseqid> ; Align proteinStartPos dnaStartPos proteinCount
130    * --showtargetgff - outputs 'features on the target' e.g. (protein2genome)
131    * Query <proteinseqid> ; Align dnaStartPos proteinStartPos nucleotideCount
132    * where the Align spec may repeat
133    */
134    // TODO handle coding2coding and similar as well
135  11 boolean featureIsOnTarget = true;
136  11 List<String> mapTo = set.get(QUERY);
137  11 if (mapTo == null)
138    {
139  3 mapTo = set.get(TARGET);
140  3 featureIsOnTarget = false;
141    }
142  11 MappingType type = getMappingType(gff[SOURCE_COL]);
143   
144  11 if (type == null)
145    {
146  0 throw new IOException("Sorry, I don't handle " + gff[SOURCE_COL]);
147    }
148   
149  11 if (mapTo == null || mapTo.size() != 1)
150    {
151  0 throw new IOException(
152    "Expecting exactly one sequence in Query or Target field (got "
153    + mapTo + ")");
154    }
155   
156    /*
157    * locate the mapped sequence in the alignment or 'new' (GFF file) sequences;
158    */
159  11 SequenceI mappedSequence = findSequence(mapTo.get(0), align, newseqs,
160    relaxedIdMatching);
161   
162    /*
163    * If mapping is from protein to dna, we store it as dna to protein instead
164    */
165  11 SequenceI mapFromSequence = seq;
166  11 SequenceI mapToSequence = mappedSequence;
167  11 if ((type == MappingType.NucleotideToPeptide && featureIsOnTarget)
168    || (type == MappingType.PeptideToNucleotide
169    && !featureIsOnTarget))
170    {
171  3 mapFromSequence = mappedSequence;
172  3 mapToSequence = seq;
173    }
174   
175    /*
176    * Process the Align maps and create mappings.
177    * These may be cdna-genome, cdna-protein, genome-protein.
178    * The mapped sequences may or may not be in the alignment
179    * (they may be included later in the GFF file).
180    */
181   
182    /*
183    * get any existing mapping for these sequences (or start one),
184    * and add this mapped range
185    */
186  11 AlignedCodonFrame acf = getMapping(align, mapFromSequence,
187    mapToSequence);
188   
189    /*
190    * exonerate GFF has the strand of the target in column 7
191    * (differs from GFF3 which has it in the Target descriptor)
192    */
193  11 String strand = gff[STRAND_COL];
194  11 boolean forwardStrand = true;
195  11 if ("-".equals(strand))
196    {
197  9 forwardStrand = false;
198    }
199  2 else if (!"+".equals(strand))
200    {
201  0 System.err.println("Strand must be specified for alignment");
202  0 return;
203    }
204   
205  11 List<String> alignedRegions = set.get(ALIGN);
206  11 for (String region : alignedRegions)
207    {
208  15 MapList mapping = buildMapping(region, type, forwardStrand,
209    featureIsOnTarget, gff);
210   
211  15 if (mapping == null)
212    {
213  0 continue;
214    }
215   
216  15 acf.addMap(mapFromSequence, mapToSequence, mapping);
217    }
218  11 align.addCodonFrame(acf);
219    }
220   
221    /**
222    * Construct the mapping
223    *
224    * @param region
225    * @param type
226    * @param forwardStrand
227    * @param featureIsOnTarget
228    * @param gff
229    * @return
230    */
 
231  15 toggle protected MapList buildMapping(String region, MappingType type,
232    boolean forwardStrand, boolean featureIsOnTarget, String[] gff)
233    {
234    /*
235    * process one "fromStart toStart fromCount" descriptor
236    */
237  15 String[] tokens = region.split(" ");
238  15 if (tokens.length != 3)
239    {
240  0 System.err.println("Malformed Align descriptor: " + region);
241  0 return null;
242    }
243   
244    /*
245    * get start/end of from/to mappings
246    * if feature is on the target sequence we have to invert the sense
247    */
248  15 int alignFromStart;
249  15 int alignToStart;
250  15 int alignCount;
251  15 try
252    {
253  15 alignFromStart = Integer.parseInt(tokens[0]);
254  15 alignToStart = Integer.parseInt(tokens[1]);
255  15 alignCount = Integer.parseInt(tokens[2]);
256    } catch (NumberFormatException nfe)
257    {
258  0 System.err.println(nfe.toString());
259  0 return null;
260    }
261   
262  15 int fromStart;
263  15 int fromEnd;
264  15 int toStart;
265  15 int toEnd;
266   
267  15 if (featureIsOnTarget)
268    {
269  12 fromStart = alignToStart;
270  12 toStart = alignFromStart;
271  12 toEnd = forwardStrand ? toStart + alignCount - 1
272    : toStart - (alignCount - 1);
273  12 int toLength = Math.abs(toEnd - toStart) + 1;
274  12 int fromLength = toLength * type.getFromRatio() / type.getToRatio();
275  12 fromEnd = fromStart + fromLength - 1;
276    }
277    else
278    {
279    // we use the 'Align' values here not the feature start/end
280    // not clear why they may differ but it seems they can
281  3 fromStart = alignFromStart;
282  3 fromEnd = alignFromStart + alignCount - 1;
283  3 int fromLength = fromEnd - fromStart + 1;
284  3 int toLength = fromLength * type.getToRatio() / type.getFromRatio();
285  3 toStart = alignToStart;
286  3 if (forwardStrand)
287    {
288  1 toEnd = toStart + toLength - 1;
289    }
290    else
291    {
292  2 toEnd = toStart - (toLength - 1);
293    }
294    }
295   
296  15 MapList codonmapping = constructMappingFromAlign(fromStart, fromEnd,
297    toStart, toEnd, type);
298  15 return codonmapping;
299    }
300   
301    /**
302    * Returns a MappingType depending on the exonerate 'model' value.
303    *
304    * @param model
305    * @return
306    */
 
307  18 toggle protected static MappingType getMappingType(String model)
308    {
309  18 MappingType result = null;
310   
311  18 if (model.contains(PROTEIN2DNA) || model.contains(PROTEIN2GENOME))
312    {
313  13 result = MappingType.PeptideToNucleotide;
314    }
315  5 else if (model.contains(CODING2CODING) || model.contains(CODING2GENOME)
316    || model.contains(CDNA2GENOME) || model.contains(GENOME2GENOME))
317    {
318  4 result = MappingType.NucleotideToNucleotide;
319    }
320  18 return result;
321    }
322   
323    /**
324    * Tests whether the GFF data looks like it was generated by exonerate, and is
325    * a format we are willing to handle
326    *
327    * @param columns
328    * @return
329    */
 
330  38 toggle public static boolean recognises(String[] columns)
331    {
332  38 if (!SIMILARITY.equalsIgnoreCase(columns[TYPE_COL]))
333    {
334  22 return false;
335    }
336   
337    /*
338    * inspect alignment model
339    */
340  16 String model = columns[SOURCE_COL];
341    // e.g. exonerate:protein2genome:local
342  16 if (model != null)
343    {
344  16 String mdl = model.toLowerCase();
345  16 if (mdl.contains(PROTEIN2DNA) || mdl.contains(PROTEIN2GENOME)
346    || mdl.contains(CODING2CODING) || mdl.contains(CODING2GENOME)
347    || mdl.contains(CDNA2GENOME) || mdl.contains(GENOME2GENOME))
348    {
349  14 return true;
350    }
351    }
352  2 System.err.println("Sorry, I don't handle exonerate model " + model);
353  2 return false;
354    }
355   
356    /**
357    * An override to set feature group to "exonerate" instead of the default GFF
358    * source value (column 2)
359    */
 
360  0 toggle @Override
361    protected SequenceFeature buildSequenceFeature(String[] gff,
362    Map<String, List<String>> set)
363    {
364  0 SequenceFeature sf = super.buildSequenceFeature(gff, TYPE_COL,
365    "exonerate", set);
366   
367  0 return sf;
368    }
369   
370    }