Clover icon

Coverage Report

  1. Project Clover database Mon Nov 11 2024 17:27:16 GMT
  2. Package jalview.io.gff

File ExonerateHelper.java

 

Coverage histogram

../../../img/srcFileCovDistChart9.png
12% of files have more coverage

Code metrics

32
88
6
1
375
202
37
0.42
14.67
6
6.17

Classes

Class Line # Actions
ExonerateHelper 39 88 37
0.849206384.9%
 

Contributing tests

This file is covered by 14 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.io.gff;
22   
23    import java.util.Locale;
24   
25    import jalview.datamodel.AlignedCodonFrame;
26    import jalview.datamodel.AlignmentI;
27    import jalview.datamodel.MappingType;
28    import jalview.datamodel.SequenceFeature;
29    import jalview.datamodel.SequenceI;
30    import jalview.util.MapList;
31   
32    import java.io.IOException;
33    import java.util.List;
34    import java.util.Map;
35   
36    /**
37    * A handler to parse GFF in the format generated by the exonerate tool
38    */
 
39    public class ExonerateHelper extends Gff2Helper
40    {
41    private static final String SIMILARITY = "similarity";
42   
43    private static final String GENOME2GENOME = "genome2genome";
44   
45    private static final String CDNA2GENOME = "cdna2genome";
46   
47    private static final String CODING2GENOME = "coding2genome";
48   
49    private static final String CODING2CODING = "coding2coding";
50   
51    private static final String PROTEIN2GENOME = "protein2genome";
52   
53    private static final String PROTEIN2DNA = "protein2dna";
54   
55    private static final String ALIGN = "Align";
56   
57    private static final String QUERY = "Query";
58   
59    private static final String TARGET = "Target";
60   
61    /**
62    * Process one GFF feature line (as modelled by SequenceFeature)
63    *
64    * @param seq
65    * the sequence with which this feature is associated
66    * @param gffColumns
67    * the sequence feature with ATTRIBUTES property containing any
68    * additional attributes
69    * @param align
70    * the alignment we are adding GFF to
71    * @param newseqs
72    * any new sequences referenced by the GFF
73    * @param relaxedIdMatching
74    * if true, match word tokens in sequence names
75    * @return true if the sequence feature should be added to the sequence, else
76    * false (i.e. it has been processed in another way e.g. to generate a
77    * mapping)
78    */
 
79  7 toggle @Override
80    public SequenceFeature processGff(SequenceI seq, String[] gffColumns,
81    AlignmentI align, List<SequenceI> newseqs,
82    boolean relaxedIdMatching)
83    {
84  7 String attr = gffColumns[ATTRIBUTES_COL];
85  7 Map<String, List<String>> set = parseNameValuePairs(attr);
86   
87  7 try
88    {
89  7 processGffSimilarity(set, seq, gffColumns, align, newseqs,
90    relaxedIdMatching);
91    } catch (IOException ivfe)
92    {
93  0 jalview.bin.Console.errPrintln(ivfe);
94    }
95   
96    /*
97    * return null to indicate we don't want to add a sequence feature for
98    * similarity (only process it to create mappings)
99    */
100  7 return null;
101    }
102   
103    /**
104    * Processes the 'Query' (or 'Target') and 'Align' properties associated with
105    * an exonerate GFF similarity feature; these properties define the mapping of
106    * the annotated range to a related sequence.
107    *
108    * @param set
109    * parsed GFF column 9 key/value(s)
110    * @param seq
111    * the sequence the GFF feature is on
112    * @param gff
113    * the GFF column data
114    * @param align
115    * the alignment the sequence belongs to, where any new mappings
116    * should be added
117    * @param newseqs
118    * a list of new 'virtual sequences' generated while parsing GFF
119    * @param relaxedIdMatching
120    * if true allow fuzzy search for a matching target sequence
121    * @throws IOException
122    */
 
123  11 toggle protected void processGffSimilarity(Map<String, List<String>> set,
124    SequenceI seq, String[] gff, AlignmentI align,
125    List<SequenceI> newseqs, boolean relaxedIdMatching)
126    throws IOException
127    {
128    /*
129    * exonerate may be run with
130    * --showquerygff - outputs 'features on the query' e.g. (protein2genome)
131    * Target <dnaseqid> ; Align proteinStartPos dnaStartPos proteinCount
132    * --showtargetgff - outputs 'features on the target' e.g. (protein2genome)
133    * Query <proteinseqid> ; Align dnaStartPos proteinStartPos nucleotideCount
134    * where the Align spec may repeat
135    */
136    // TODO handle coding2coding and similar as well
137  11 boolean featureIsOnTarget = true;
138  11 List<String> mapTo = set.get(QUERY);
139  11 if (mapTo == null)
140    {
141  3 mapTo = set.get(TARGET);
142  3 featureIsOnTarget = false;
143    }
144  11 MappingType type = getMappingType(gff[SOURCE_COL]);
145   
146  11 if (type == null)
147    {
148  0 throw new IOException("Sorry, I don't handle " + gff[SOURCE_COL]);
149    }
150   
151  11 if (mapTo == null || mapTo.size() != 1)
152    {
153  0 throw new IOException(
154    "Expecting exactly one sequence in Query or Target field (got "
155    + mapTo + ")");
156    }
157   
158    /*
159    * locate the mapped sequence in the alignment or 'new' (GFF file) sequences;
160    */
161  11 SequenceI mappedSequence = findSequence(mapTo.get(0), align, newseqs,
162    relaxedIdMatching);
163   
164    /*
165    * If mapping is from protein to dna, we store it as dna to protein instead
166    */
167  11 SequenceI mapFromSequence = seq;
168  11 SequenceI mapToSequence = mappedSequence;
169  11 if ((type == MappingType.NucleotideToPeptide && featureIsOnTarget)
170    || (type == MappingType.PeptideToNucleotide
171    && !featureIsOnTarget))
172    {
173  3 mapFromSequence = mappedSequence;
174  3 mapToSequence = seq;
175    }
176   
177    /*
178    * Process the Align maps and create mappings.
179    * These may be cdna-genome, cdna-protein, genome-protein.
180    * The mapped sequences may or may not be in the alignment
181    * (they may be included later in the GFF file).
182    */
183   
184    /*
185    * get any existing mapping for these sequences (or start one),
186    * and add this mapped range
187    */
188  11 AlignedCodonFrame acf = getMapping(align, mapFromSequence,
189    mapToSequence);
190   
191    /*
192    * exonerate GFF has the strand of the target in column 7
193    * (differs from GFF3 which has it in the Target descriptor)
194    */
195  11 String strand = gff[STRAND_COL];
196  11 boolean forwardStrand = true;
197  11 if ("-".equals(strand))
198    {
199  9 forwardStrand = false;
200    }
201  2 else if (!"+".equals(strand))
202    {
203  0 jalview.bin.Console
204    .errPrintln("Strand must be specified for alignment");
205  0 return;
206    }
207   
208  11 List<String> alignedRegions = set.get(ALIGN);
209  11 for (String region : alignedRegions)
210    {
211  15 MapList mapping = buildMapping(region, type, forwardStrand,
212    featureIsOnTarget, gff);
213   
214  15 if (mapping == null)
215    {
216  0 continue;
217    }
218   
219  15 acf.addMap(mapFromSequence, mapToSequence, mapping);
220    }
221  11 align.addCodonFrame(acf);
222    }
223   
224    /**
225    * Construct the mapping
226    *
227    * @param region
228    * @param type
229    * @param forwardStrand
230    * @param featureIsOnTarget
231    * @param gff
232    * @return
233    */
 
234  15 toggle protected MapList buildMapping(String region, MappingType type,
235    boolean forwardStrand, boolean featureIsOnTarget, String[] gff)
236    {
237    /*
238    * process one "fromStart toStart fromCount" descriptor
239    */
240  15 String[] tokens = region.split(" ");
241  15 if (tokens.length != 3)
242    {
243  0 jalview.bin.Console
244    .errPrintln("Malformed Align descriptor: " + region);
245  0 return null;
246    }
247   
248    /*
249    * get start/end of from/to mappings
250    * if feature is on the target sequence we have to invert the sense
251    */
252  15 int alignFromStart;
253  15 int alignToStart;
254  15 int alignCount;
255  15 try
256    {
257  15 alignFromStart = Integer.parseInt(tokens[0]);
258  15 alignToStart = Integer.parseInt(tokens[1]);
259  15 alignCount = Integer.parseInt(tokens[2]);
260    } catch (NumberFormatException nfe)
261    {
262  0 jalview.bin.Console.errPrintln(nfe.toString());
263  0 return null;
264    }
265   
266  15 int fromStart;
267  15 int fromEnd;
268  15 int toStart;
269  15 int toEnd;
270   
271  15 if (featureIsOnTarget)
272    {
273  12 fromStart = alignToStart;
274  12 toStart = alignFromStart;
275  12 toEnd = forwardStrand ? toStart + alignCount - 1
276    : toStart - (alignCount - 1);
277  12 int toLength = Math.abs(toEnd - toStart) + 1;
278  12 int fromLength = toLength * type.getFromRatio() / type.getToRatio();
279  12 fromEnd = fromStart + fromLength - 1;
280    }
281    else
282    {
283    // we use the 'Align' values here not the feature start/end
284    // not clear why they may differ but it seems they can
285  3 fromStart = alignFromStart;
286  3 fromEnd = alignFromStart + alignCount - 1;
287  3 int fromLength = fromEnd - fromStart + 1;
288  3 int toLength = fromLength * type.getToRatio() / type.getFromRatio();
289  3 toStart = alignToStart;
290  3 if (forwardStrand)
291    {
292  1 toEnd = toStart + toLength - 1;
293    }
294    else
295    {
296  2 toEnd = toStart - (toLength - 1);
297    }
298    }
299   
300  15 MapList codonmapping = constructMappingFromAlign(fromStart, fromEnd,
301    toStart, toEnd, type);
302  15 return codonmapping;
303    }
304   
305    /**
306    * Returns a MappingType depending on the exonerate 'model' value.
307    *
308    * @param model
309    * @return
310    */
 
311  18 toggle protected static MappingType getMappingType(String model)
312    {
313  18 MappingType result = null;
314   
315  18 if (model.contains(PROTEIN2DNA) || model.contains(PROTEIN2GENOME))
316    {
317  13 result = MappingType.PeptideToNucleotide;
318    }
319  5 else if (model.contains(CODING2CODING) || model.contains(CODING2GENOME)
320    || model.contains(CDNA2GENOME) || model.contains(GENOME2GENOME))
321    {
322  4 result = MappingType.NucleotideToNucleotide;
323    }
324  18 return result;
325    }
326   
327    /**
328    * Tests whether the GFF data looks like it was generated by exonerate, and is
329    * a format we are willing to handle
330    *
331    * @param columns
332    * @return
333    */
 
334  38 toggle public static boolean recognises(String[] columns)
335    {
336  38 if (!SIMILARITY.equalsIgnoreCase(columns[TYPE_COL]))
337    {
338  22 return false;
339    }
340   
341    /*
342    * inspect alignment model
343    */
344  16 String model = columns[SOURCE_COL];
345    // e.g. exonerate:protein2genome:local
346  16 if (model != null)
347    {
348  16 String mdl = model.toLowerCase(Locale.ROOT);
349  16 if (mdl.contains(PROTEIN2DNA) || mdl.contains(PROTEIN2GENOME)
350    || mdl.contains(CODING2CODING) || mdl.contains(CODING2GENOME)
351    || mdl.contains(CDNA2GENOME) || mdl.contains(GENOME2GENOME))
352    {
353  14 return true;
354    }
355    }
356  2 jalview.bin.Console
357    .errPrintln("Sorry, I don't handle exonerate model " + model);
358  2 return false;
359    }
360   
361    /**
362    * An override to set feature group to "exonerate" instead of the default GFF
363    * source value (column 2)
364    */
 
365  0 toggle @Override
366    protected SequenceFeature buildSequenceFeature(String[] gff,
367    Map<String, List<String>> set)
368    {
369  0 SequenceFeature sf = super.buildSequenceFeature(gff, TYPE_COL,
370    "exonerate", set);
371   
372  0 return sf;
373    }
374   
375    }