Clover icon

Coverage Report

  1. Project Clover database Mon Nov 18 2024 09:38:20 GMT
  2. Package jalview.io.gff

File ExonerateHelper.java

 

Coverage histogram

../../../img/srcFileCovDistChart0.png
54% of files have more coverage

Code metrics

32
88
6
1
375
202
37
0.42
14.67
6
6.17

Classes

Class Line # Actions
ExonerateHelper 39 88 37
0.00%
 

Contributing tests

No tests hitting this source file were found.

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.io.gff;
22   
23    import java.util.Locale;
24   
25    import jalview.datamodel.AlignedCodonFrame;
26    import jalview.datamodel.AlignmentI;
27    import jalview.datamodel.MappingType;
28    import jalview.datamodel.SequenceFeature;
29    import jalview.datamodel.SequenceI;
30    import jalview.util.MapList;
31   
32    import java.io.IOException;
33    import java.util.List;
34    import java.util.Map;
35   
36    /**
37    * A handler to parse GFF in the format generated by the exonerate tool
38    */
 
39    public class ExonerateHelper extends Gff2Helper
40    {
41    private static final String SIMILARITY = "similarity";
42   
43    private static final String GENOME2GENOME = "genome2genome";
44   
45    private static final String CDNA2GENOME = "cdna2genome";
46   
47    private static final String CODING2GENOME = "coding2genome";
48   
49    private static final String CODING2CODING = "coding2coding";
50   
51    private static final String PROTEIN2GENOME = "protein2genome";
52   
53    private static final String PROTEIN2DNA = "protein2dna";
54   
55    private static final String ALIGN = "Align";
56   
57    private static final String QUERY = "Query";
58   
59    private static final String TARGET = "Target";
60   
61    /**
62    * Process one GFF feature line (as modelled by SequenceFeature)
63    *
64    * @param seq
65    * the sequence with which this feature is associated
66    * @param gffColumns
67    * the sequence feature with ATTRIBUTES property containing any
68    * additional attributes
69    * @param align
70    * the alignment we are adding GFF to
71    * @param newseqs
72    * any new sequences referenced by the GFF
73    * @param relaxedIdMatching
74    * if true, match word tokens in sequence names
75    * @return true if the sequence feature should be added to the sequence, else
76    * false (i.e. it has been processed in another way e.g. to generate a
77    * mapping)
78    */
 
79  0 toggle @Override
80    public SequenceFeature processGff(SequenceI seq, String[] gffColumns,
81    AlignmentI align, List<SequenceI> newseqs,
82    boolean relaxedIdMatching)
83    {
84  0 String attr = gffColumns[ATTRIBUTES_COL];
85  0 Map<String, List<String>> set = parseNameValuePairs(attr);
86   
87  0 try
88    {
89  0 processGffSimilarity(set, seq, gffColumns, align, newseqs,
90    relaxedIdMatching);
91    } catch (IOException ivfe)
92    {
93  0 jalview.bin.Console.errPrintln(ivfe);
94    }
95   
96    /*
97    * return null to indicate we don't want to add a sequence feature for
98    * similarity (only process it to create mappings)
99    */
100  0 return null;
101    }
102   
103    /**
104    * Processes the 'Query' (or 'Target') and 'Align' properties associated with
105    * an exonerate GFF similarity feature; these properties define the mapping of
106    * the annotated range to a related sequence.
107    *
108    * @param set
109    * parsed GFF column 9 key/value(s)
110    * @param seq
111    * the sequence the GFF feature is on
112    * @param gff
113    * the GFF column data
114    * @param align
115    * the alignment the sequence belongs to, where any new mappings
116    * should be added
117    * @param newseqs
118    * a list of new 'virtual sequences' generated while parsing GFF
119    * @param relaxedIdMatching
120    * if true allow fuzzy search for a matching target sequence
121    * @throws IOException
122    */
 
123  0 toggle protected void processGffSimilarity(Map<String, List<String>> set,
124    SequenceI seq, String[] gff, AlignmentI align,
125    List<SequenceI> newseqs, boolean relaxedIdMatching)
126    throws IOException
127    {
128    /*
129    * exonerate may be run with
130    * --showquerygff - outputs 'features on the query' e.g. (protein2genome)
131    * Target <dnaseqid> ; Align proteinStartPos dnaStartPos proteinCount
132    * --showtargetgff - outputs 'features on the target' e.g. (protein2genome)
133    * Query <proteinseqid> ; Align dnaStartPos proteinStartPos nucleotideCount
134    * where the Align spec may repeat
135    */
136    // TODO handle coding2coding and similar as well
137  0 boolean featureIsOnTarget = true;
138  0 List<String> mapTo = set.get(QUERY);
139  0 if (mapTo == null)
140    {
141  0 mapTo = set.get(TARGET);
142  0 featureIsOnTarget = false;
143    }
144  0 MappingType type = getMappingType(gff[SOURCE_COL]);
145   
146  0 if (type == null)
147    {
148  0 throw new IOException("Sorry, I don't handle " + gff[SOURCE_COL]);
149    }
150   
151  0 if (mapTo == null || mapTo.size() != 1)
152    {
153  0 throw new IOException(
154    "Expecting exactly one sequence in Query or Target field (got "
155    + mapTo + ")");
156    }
157   
158    /*
159    * locate the mapped sequence in the alignment or 'new' (GFF file) sequences;
160    */
161  0 SequenceI mappedSequence = findSequence(mapTo.get(0), align, newseqs,
162    relaxedIdMatching);
163   
164    /*
165    * If mapping is from protein to dna, we store it as dna to protein instead
166    */
167  0 SequenceI mapFromSequence = seq;
168  0 SequenceI mapToSequence = mappedSequence;
169  0 if ((type == MappingType.NucleotideToPeptide && featureIsOnTarget)
170    || (type == MappingType.PeptideToNucleotide
171    && !featureIsOnTarget))
172    {
173  0 mapFromSequence = mappedSequence;
174  0 mapToSequence = seq;
175    }
176   
177    /*
178    * Process the Align maps and create mappings.
179    * These may be cdna-genome, cdna-protein, genome-protein.
180    * The mapped sequences may or may not be in the alignment
181    * (they may be included later in the GFF file).
182    */
183   
184    /*
185    * get any existing mapping for these sequences (or start one),
186    * and add this mapped range
187    */
188  0 AlignedCodonFrame acf = getMapping(align, mapFromSequence,
189    mapToSequence);
190   
191    /*
192    * exonerate GFF has the strand of the target in column 7
193    * (differs from GFF3 which has it in the Target descriptor)
194    */
195  0 String strand = gff[STRAND_COL];
196  0 boolean forwardStrand = true;
197  0 if ("-".equals(strand))
198    {
199  0 forwardStrand = false;
200    }
201  0 else if (!"+".equals(strand))
202    {
203  0 jalview.bin.Console
204    .errPrintln("Strand must be specified for alignment");
205  0 return;
206    }
207   
208  0 List<String> alignedRegions = set.get(ALIGN);
209  0 for (String region : alignedRegions)
210    {
211  0 MapList mapping = buildMapping(region, type, forwardStrand,
212    featureIsOnTarget, gff);
213   
214  0 if (mapping == null)
215    {
216  0 continue;
217    }
218   
219  0 acf.addMap(mapFromSequence, mapToSequence, mapping);
220    }
221  0 align.addCodonFrame(acf);
222    }
223   
224    /**
225    * Construct the mapping
226    *
227    * @param region
228    * @param type
229    * @param forwardStrand
230    * @param featureIsOnTarget
231    * @param gff
232    * @return
233    */
 
234  0 toggle protected MapList buildMapping(String region, MappingType type,
235    boolean forwardStrand, boolean featureIsOnTarget, String[] gff)
236    {
237    /*
238    * process one "fromStart toStart fromCount" descriptor
239    */
240  0 String[] tokens = region.split(" ");
241  0 if (tokens.length != 3)
242    {
243  0 jalview.bin.Console
244    .errPrintln("Malformed Align descriptor: " + region);
245  0 return null;
246    }
247   
248    /*
249    * get start/end of from/to mappings
250    * if feature is on the target sequence we have to invert the sense
251    */
252  0 int alignFromStart;
253  0 int alignToStart;
254  0 int alignCount;
255  0 try
256    {
257  0 alignFromStart = Integer.parseInt(tokens[0]);
258  0 alignToStart = Integer.parseInt(tokens[1]);
259  0 alignCount = Integer.parseInt(tokens[2]);
260    } catch (NumberFormatException nfe)
261    {
262  0 jalview.bin.Console.errPrintln(nfe.toString());
263  0 return null;
264    }
265   
266  0 int fromStart;
267  0 int fromEnd;
268  0 int toStart;
269  0 int toEnd;
270   
271  0 if (featureIsOnTarget)
272    {
273  0 fromStart = alignToStart;
274  0 toStart = alignFromStart;
275  0 toEnd = forwardStrand ? toStart + alignCount - 1
276    : toStart - (alignCount - 1);
277  0 int toLength = Math.abs(toEnd - toStart) + 1;
278  0 int fromLength = toLength * type.getFromRatio() / type.getToRatio();
279  0 fromEnd = fromStart + fromLength - 1;
280    }
281    else
282    {
283    // we use the 'Align' values here not the feature start/end
284    // not clear why they may differ but it seems they can
285  0 fromStart = alignFromStart;
286  0 fromEnd = alignFromStart + alignCount - 1;
287  0 int fromLength = fromEnd - fromStart + 1;
288  0 int toLength = fromLength * type.getToRatio() / type.getFromRatio();
289  0 toStart = alignToStart;
290  0 if (forwardStrand)
291    {
292  0 toEnd = toStart + toLength - 1;
293    }
294    else
295    {
296  0 toEnd = toStart - (toLength - 1);
297    }
298    }
299   
300  0 MapList codonmapping = constructMappingFromAlign(fromStart, fromEnd,
301    toStart, toEnd, type);
302  0 return codonmapping;
303    }
304   
305    /**
306    * Returns a MappingType depending on the exonerate 'model' value.
307    *
308    * @param model
309    * @return
310    */
 
311  0 toggle protected static MappingType getMappingType(String model)
312    {
313  0 MappingType result = null;
314   
315  0 if (model.contains(PROTEIN2DNA) || model.contains(PROTEIN2GENOME))
316    {
317  0 result = MappingType.PeptideToNucleotide;
318    }
319  0 else if (model.contains(CODING2CODING) || model.contains(CODING2GENOME)
320    || model.contains(CDNA2GENOME) || model.contains(GENOME2GENOME))
321    {
322  0 result = MappingType.NucleotideToNucleotide;
323    }
324  0 return result;
325    }
326   
327    /**
328    * Tests whether the GFF data looks like it was generated by exonerate, and is
329    * a format we are willing to handle
330    *
331    * @param columns
332    * @return
333    */
 
334  0 toggle public static boolean recognises(String[] columns)
335    {
336  0 if (!SIMILARITY.equalsIgnoreCase(columns[TYPE_COL]))
337    {
338  0 return false;
339    }
340   
341    /*
342    * inspect alignment model
343    */
344  0 String model = columns[SOURCE_COL];
345    // e.g. exonerate:protein2genome:local
346  0 if (model != null)
347    {
348  0 String mdl = model.toLowerCase(Locale.ROOT);
349  0 if (mdl.contains(PROTEIN2DNA) || mdl.contains(PROTEIN2GENOME)
350    || mdl.contains(CODING2CODING) || mdl.contains(CODING2GENOME)
351    || mdl.contains(CDNA2GENOME) || mdl.contains(GENOME2GENOME))
352    {
353  0 return true;
354    }
355    }
356  0 jalview.bin.Console
357    .errPrintln("Sorry, I don't handle exonerate model " + model);
358  0 return false;
359    }
360   
361    /**
362    * An override to set feature group to "exonerate" instead of the default GFF
363    * source value (column 2)
364    */
 
365  0 toggle @Override
366    protected SequenceFeature buildSequenceFeature(String[] gff,
367    Map<String, List<String>> set)
368    {
369  0 SequenceFeature sf = super.buildSequenceFeature(gff, TYPE_COL,
370    "exonerate", set);
371   
372  0 return sf;
373    }
374   
375    }