Clover icon

jalviewX

  1. Project Clover database Wed Oct 31 2018 15:13:58 GMT
  2. Package jalview.io

File FeaturesFile.java

 

Coverage histogram

../../img/srcFileCovDistChart8.png
19% of files have more coverage

Code metrics

188
363
25
1
1,291
798
145
0.4
14.52
25
5.8

Classes

Class Line # Actions
FeaturesFile 71 363 145 129
0.776041777.6%
 

Contributing tests

This file is covered by 14 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.io;
22   
23    import jalview.analysis.AlignmentUtils;
24    import jalview.analysis.SequenceIdMatcher;
25    import jalview.api.AlignViewportI;
26    import jalview.api.FeatureColourI;
27    import jalview.api.FeaturesSourceI;
28    import jalview.datamodel.AlignedCodonFrame;
29    import jalview.datamodel.Alignment;
30    import jalview.datamodel.AlignmentI;
31    import jalview.datamodel.SequenceDummy;
32    import jalview.datamodel.SequenceFeature;
33    import jalview.datamodel.SequenceI;
34    import jalview.datamodel.features.FeatureMatcherSet;
35    import jalview.datamodel.features.FeatureMatcherSetI;
36    import jalview.io.gff.GffHelperBase;
37    import jalview.io.gff.GffHelperFactory;
38    import jalview.io.gff.GffHelperI;
39    import jalview.schemes.FeatureColour;
40    import jalview.util.ColorUtils;
41    import jalview.util.MapList;
42    import jalview.util.ParseHtmlBodyAndLinks;
43    import jalview.util.StringUtils;
44   
45    import java.awt.Color;
46    import java.io.IOException;
47    import java.util.ArrayList;
48    import java.util.Arrays;
49    import java.util.Collections;
50    import java.util.HashMap;
51    import java.util.List;
52    import java.util.Map;
53    import java.util.Map.Entry;
54   
55    /**
56    * Parses and writes features files, which may be in Jalview, GFF2 or GFF3
57    * format. These are tab-delimited formats but with differences in the use of
58    * columns.
59    *
60    * A Jalview feature file may define feature colours and then declare that the
61    * remainder of the file is in GFF format with the line 'GFF'.
62    *
63    * GFF3 files may include alignment mappings for features, which Jalview will
64    * attempt to model, and may include sequence data following a ##FASTA line.
65    *
66    *
67    * @author AMW
68    * @author jbprocter
69    * @author gmcarstairs
70    */
 
71    public class FeaturesFile extends AlignFile implements FeaturesSourceI
72    {
73    private static final String TAB_REGEX = "\\t";
74   
75    private static final String STARTGROUP = "STARTGROUP";
76   
77    private static final String ENDGROUP = "ENDGROUP";
78   
79    private static final String STARTFILTERS = "STARTFILTERS";
80   
81    private static final String ENDFILTERS = "ENDFILTERS";
82   
83    private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED";
84   
85    private static final String NOTE = "Note";
86   
87    protected static final String GFF_VERSION = "##gff-version";
88   
89    private AlignmentI lastmatchedAl = null;
90   
91    private SequenceIdMatcher matcher = null;
92   
93    protected AlignmentI dataset;
94   
95    protected int gffVersion;
96   
97    /**
98    * Creates a new FeaturesFile object.
99    */
 
100  2 toggle public FeaturesFile()
101    {
102    }
103   
104    /**
105    * Constructor which does not parse the file immediately
106    *
107    * @param file File or String filename
108    * @param paste
109    * @throws IOException
110    */
 
111  8 toggle public FeaturesFile(Object file, DataSourceType paste)
112    throws IOException
113    {
114  8 super(false, file, paste);
115    }
116   
117    /**
118    * @param source
119    * @throws IOException
120    */
 
121  1 toggle public FeaturesFile(FileParse source) throws IOException
122    {
123  1 super(source);
124    }
125   
126    /**
127    * Constructor that optionally parses the file immediately
128    *
129    * @param parseImmediately
130    * @param file
131    * @param type
132    * @throws IOException
133    */
 
134  3 toggle public FeaturesFile(boolean parseImmediately, Object file,
135    DataSourceType type) throws IOException
136    {
137  3 super(parseImmediately, file, type);
138    }
139   
140    /**
141    * Parse GFF or sequence features file using case-independent matching,
142    * discarding URLs
143    *
144    * @param align
145    * - alignment/dataset containing sequences that are to be annotated
146    * @param colours
147    * - hashtable to store feature colour definitions
148    * @param removeHTML
149    * - process html strings into plain text
150    * @return true if features were added
151    */
 
152  5 toggle public boolean parse(AlignmentI align,
153    Map<String, FeatureColourI> colours, boolean removeHTML)
154    {
155  5 return parse(align, colours, removeHTML, false);
156    }
157   
158    /**
159    * Extends the default addProperties by also adding peptide-to-cDNA mappings
160    * (if any) derived while parsing a GFF file
161    */
 
162  2 toggle @Override
163    public void addProperties(AlignmentI al)
164    {
165  2 super.addProperties(al);
166  2 if (dataset != null && dataset.getCodonFrames() != null)
167    {
168  2 AlignmentI ds = (al.getDataset() == null) ? al : al.getDataset();
169  2 for (AlignedCodonFrame codons : dataset.getCodonFrames())
170    {
171  2 ds.addCodonFrame(codons);
172    }
173    }
174    }
175   
176    /**
177    * Parse GFF or Jalview format sequence features file
178    *
179    * @param align
180    * - alignment/dataset containing sequences that are to be annotated
181    * @param colours
182    * - map to store feature colour definitions
183    * @param removeHTML
184    * - process html strings into plain text
185    * @param relaxedIdmatching
186    * - when true, ID matches to compound sequence IDs are allowed
187    * @return true if features were added
188    */
 
189  9 toggle public boolean parse(AlignmentI align,
190    Map<String, FeatureColourI> colours, boolean removeHTML,
191    boolean relaxedIdmatching)
192    {
193  9 return parse(align, colours, null, removeHTML, relaxedIdmatching);
194    }
195   
196    /**
197    * Parse GFF or Jalview format sequence features file
198    *
199    * @param align
200    * - alignment/dataset containing sequences that are to be annotated
201    * @param colours
202    * - map to store feature colour definitions
203    * @param filters
204    * - map to store feature filter definitions
205    * @param removeHTML
206    * - process html strings into plain text
207    * @param relaxedIdmatching
208    * - when true, ID matches to compound sequence IDs are allowed
209    * @return true if features were added
210    */
 
211  11 toggle public boolean parse(AlignmentI align,
212    Map<String, FeatureColourI> colours,
213    Map<String, FeatureMatcherSetI> filters, boolean removeHTML,
214    boolean relaxedIdmatching)
215    {
216  11 Map<String, String> gffProps = new HashMap<>();
217    /*
218    * keep track of any sequences we try to create from the data
219    */
220  11 List<SequenceI> newseqs = new ArrayList<>();
221   
222  11 String line = null;
223  11 try
224    {
225  11 String[] gffColumns;
226  11 String featureGroup = null;
227   
228  ? while ((line = nextLine()) != null)
229    {
230    // skip comments/process pragmas
231  494 if (line.length() == 0 || line.startsWith("#"))
232    {
233  95 if (line.toLowerCase().startsWith("##"))
234    {
235  26 processGffPragma(line, gffProps, align, newseqs);
236    }
237  95 continue;
238    }
239   
240  399 gffColumns = line.split(TAB_REGEX);
241  399 if (gffColumns.length == 1)
242    {
243  2 if (line.trim().equalsIgnoreCase("GFF"))
244    {
245    /*
246    * Jalview features file with appended GFF
247    * assume GFF2 (though it may declare ##gff-version 3)
248    */
249  1 gffVersion = 2;
250  1 continue;
251    }
252    }
253   
254  398 if (gffColumns.length > 0 && gffColumns.length < 4)
255    {
256    /*
257    * if 2 or 3 tokens, we anticipate either 'startgroup', 'endgroup' or
258    * a feature type colour specification
259    */
260  42 String ft = gffColumns[0];
261  42 if (ft.equalsIgnoreCase(STARTFILTERS))
262    {
263  1 parseFilters(filters);
264  1 continue;
265    }
266  41 if (ft.equalsIgnoreCase(STARTGROUP))
267    {
268  5 featureGroup = gffColumns[1];
269    }
270  36 else if (ft.equalsIgnoreCase(ENDGROUP))
271    {
272    // We should check whether this is the current group,
273    // but at present there's no way of showing more than 1 group
274  5 featureGroup = null;
275    }
276    else
277    {
278  31 String colscheme = gffColumns[1];
279  31 FeatureColourI colour = FeatureColour
280    .parseJalviewFeatureColour(colscheme);
281  31 if (colour != null)
282    {
283  31 colours.put(ft, colour);
284    }
285    }
286  41 continue;
287    }
288   
289    /*
290    * if not a comment, GFF pragma, startgroup, endgroup or feature
291    * colour specification, that just leaves a feature details line
292    * in either Jalview or GFF format
293    */
294  356 if (gffVersion == 0)
295    {
296  330 parseJalviewFeature(line, gffColumns, align, colours, removeHTML,
297    relaxedIdmatching, featureGroup);
298    }
299    else
300    {
301  26 parseGff(gffColumns, align, relaxedIdmatching, newseqs);
302    }
303    }
304  11 resetMatcher();
305    } catch (Exception ex)
306    {
307    // should report somewhere useful for UI if necessary
308  0 warningMessage = ((warningMessage == null) ? "" : warningMessage)
309    + "Parsing error at\n" + line;
310  0 System.out.println("Error parsing feature file: " + ex + "\n" + line);
311  0 ex.printStackTrace(System.err);
312  0 resetMatcher();
313  0 return false;
314    }
315   
316    /*
317    * experimental - add any dummy sequences with features to the alignment
318    * - we need them for Ensembl feature extraction - though maybe not otherwise
319    */
320  11 for (SequenceI newseq : newseqs)
321    {
322  3 if (newseq.getFeatures().hasFeatures())
323    {
324  1 align.addSequence(newseq);
325    }
326    }
327  11 return true;
328    }
329   
330    /**
331    * Reads input lines from STARTFILTERS to ENDFILTERS and adds a feature type
332    * filter to the map for each line parsed. After exit from this method,
333    * nextLine() should return the line after ENDFILTERS (or we are already at
334    * end of file if ENDFILTERS was missing).
335    *
336    * @param filters
337    * @throws IOException
338    */
 
339  2 toggle protected void parseFilters(Map<String, FeatureMatcherSetI> filters)
340    throws IOException
341    {
342  2 String line;
343  ? while ((line = nextLine()) != null)
344    {
345  5 if (line.toUpperCase().startsWith(ENDFILTERS))
346    {
347  1 return;
348    }
349  4 String[] tokens = line.split(TAB_REGEX);
350  4 if (tokens.length != 2)
351    {
352  0 System.err.println(String.format("Invalid token count %d for %d",
353    tokens.length, line));
354    }
355    else
356    {
357  4 String featureType = tokens[0];
358  4 FeatureMatcherSetI fm = FeatureMatcherSet.fromString(tokens[1]);
359  4 if (fm != null && filters != null)
360    {
361  2 filters.put(featureType, fm);
362    }
363    }
364    }
365    }
366   
367    /**
368    * Try to parse a Jalview format feature specification and add it as a
369    * sequence feature to any matching sequences in the alignment. Returns true
370    * if successful (a feature was added), or false if not.
371    *
372    * @param line
373    * @param gffColumns
374    * @param alignment
375    * @param featureColours
376    * @param removeHTML
377    * @param relaxedIdmatching
378    * @param featureGroup
379    */
 
380  330 toggle protected boolean parseJalviewFeature(String line, String[] gffColumns,
381    AlignmentI alignment, Map<String, FeatureColourI> featureColours,
382    boolean removeHTML, boolean relaxedIdMatching,
383    String featureGroup)
384    {
385    /*
386    * tokens: description seqid seqIndex start end type [score]
387    */
388  330 if (gffColumns.length < 6)
389    {
390  0 System.err.println("Ignoring feature line '" + line
391    + "' with too few columns (" + gffColumns.length + ")");
392  0 return false;
393    }
394  330 String desc = gffColumns[0];
395  330 String seqId = gffColumns[1];
396  330 SequenceI seq = findSequence(seqId, alignment, null, relaxedIdMatching);
397   
398  330 if (!ID_NOT_SPECIFIED.equals(seqId))
399    {
400  329 seq = findSequence(seqId, alignment, null, relaxedIdMatching);
401    }
402    else
403    {
404  1 seqId = null;
405  1 seq = null;
406  1 String seqIndex = gffColumns[2];
407  1 try
408    {
409  1 int idx = Integer.parseInt(seqIndex);
410  1 seq = alignment.getSequenceAt(idx);
411    } catch (NumberFormatException ex)
412    {
413  0 System.err.println("Invalid sequence index: " + seqIndex);
414    }
415    }
416   
417  330 if (seq == null)
418    {
419  0 System.out.println("Sequence not found: " + line);
420  0 return false;
421    }
422   
423  330 int startPos = Integer.parseInt(gffColumns[3]);
424  330 int endPos = Integer.parseInt(gffColumns[4]);
425   
426  330 String ft = gffColumns[5];
427   
428  330 if (!featureColours.containsKey(ft))
429    {
430    /*
431    * Perhaps an old style groups file with no colours -
432    * synthesize a colour from the feature type
433    */
434  3 Color colour = ColorUtils.createColourFromName(ft);
435  3 featureColours.put(ft, new FeatureColour(colour));
436    }
437  330 SequenceFeature sf = null;
438  330 if (gffColumns.length > 6)
439    {
440  48 float score = Float.NaN;
441  48 try
442    {
443  48 score = new Float(gffColumns[6]).floatValue();
444    } catch (NumberFormatException ex)
445    {
446  0 sf = new SequenceFeature(ft, desc, startPos, endPos, featureGroup);
447    }
448  48 sf = new SequenceFeature(ft, desc, startPos, endPos, score,
449    featureGroup);
450    }
451    else
452    {
453  282 sf = new SequenceFeature(ft, desc, startPos, endPos, featureGroup);
454    }
455   
456  330 parseDescriptionHTML(sf, removeHTML);
457   
458  330 seq.addSequenceFeature(sf);
459   
460  ? while (seqId != null
461    && (seq = alignment.findName(seq, seqId, false)) != null)
462    {
463  0 seq.addSequenceFeature(new SequenceFeature(sf));
464    }
465  330 return true;
466    }
467   
468    /**
469    * clear any temporary handles used to speed up ID matching
470    */
 
471  11 toggle protected void resetMatcher()
472    {
473  11 lastmatchedAl = null;
474  11 matcher = null;
475    }
476   
477    /**
478    * Returns a sequence matching the given id, as follows
479    * <ul>
480    * <li>strict matching is on exact sequence name</li>
481    * <li>relaxed matching allows matching on a token within the sequence name,
482    * or a dbxref</li>
483    * <li>first tries to find a match in the alignment sequences</li>
484    * <li>else tries to find a match in the new sequences already generated while
485    * parsing the features file</li>
486    * <li>else creates a new placeholder sequence, adds it to the new sequences
487    * list, and returns it</li>
488    * </ul>
489    *
490    * @param seqId
491    * @param align
492    * @param newseqs
493    * @param relaxedIdMatching
494    *
495    * @return
496    */
 
497  685 toggle protected SequenceI findSequence(String seqId, AlignmentI align,
498    List<SequenceI> newseqs, boolean relaxedIdMatching)
499    {
500    // TODO encapsulate in SequenceIdMatcher, share the matcher
501    // with the GffHelper (removing code duplication)
502  685 SequenceI match = null;
503  685 if (relaxedIdMatching)
504    {
505  12 if (lastmatchedAl != align)
506    {
507  3 lastmatchedAl = align;
508  3 matcher = new SequenceIdMatcher(align.getSequencesArray());
509  3 if (newseqs != null)
510    {
511  3 matcher.addAll(newseqs);
512    }
513    }
514  12 match = matcher.findIdMatch(seqId);
515    }
516    else
517    {
518  673 match = align.findName(seqId, true);
519  673 if (match == null && newseqs != null)
520    {
521  9 for (SequenceI m : newseqs)
522    {
523  7 if (seqId.equals(m.getName()))
524    {
525  7 return m;
526    }
527    }
528    }
529   
530    }
531  678 if (match == null && newseqs != null)
532    {
533  5 match = new SequenceDummy(seqId);
534  5 if (relaxedIdMatching)
535    {
536  3 matcher.addAll(Arrays.asList(new SequenceI[] { match }));
537    }
538    // add dummy sequence to the newseqs list
539  5 newseqs.add(match);
540    }
541  678 return match;
542    }
543   
 
544  330 toggle public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML)
545    {
546  330 if (sf.getDescription() == null)
547    {
548  0 return;
549    }
550  330 ParseHtmlBodyAndLinks parsed = new ParseHtmlBodyAndLinks(
551    sf.getDescription(), removeHTML, newline);
552   
553  330 if (removeHTML)
554    {
555  326 sf.setDescription(parsed.getNonHtmlContent());
556    }
557   
558  330 for (String link : parsed.getLinks())
559    {
560  106 sf.addLink(link);
561    }
562    }
563   
564    /**
565    * Returns contents of a Jalview format features file, for visible features, as
566    * filtered by type and group. Features with a null group are displayed if their
567    * feature type is visible. Non-positional features may optionally be included
568    * (with no check on type or group).
569    *
570    * @param sequences
571    * source of features
572    * @param visible
573    * map of colour for each visible feature type
574    * @param featureFilters
575    * @param visibleFeatureGroups
576    * @param includeNonPositional
577    * if true, include non-positional features (regardless of group or
578    * type)
579    * @return
580    */
 
581  4 toggle public String printJalviewFormat(SequenceI[] sequences,
582    Map<String, FeatureColourI> visible,
583    Map<String, FeatureMatcherSetI> featureFilters,
584    List<String> visibleFeatureGroups, boolean includeNonPositional)
585    {
586  4 if (!includeNonPositional && (visible == null || visible.isEmpty()))
587    {
588    // no point continuing.
589  1 return "No Features Visible";
590    }
591   
592    /*
593    * write out feature colours (if we know them)
594    */
595    // TODO: decide if feature links should also be written here ?
596  3 StringBuilder out = new StringBuilder(256);
597  3 if (visible != null)
598    {
599  3 for (Entry<String, FeatureColourI> featureColour : visible.entrySet())
600    {
601  5 FeatureColourI colour = featureColour.getValue();
602  5 out.append(colour.toJalviewFormat(featureColour.getKey())).append(
603    newline);
604    }
605    }
606   
607  3 String[] types = visible == null ? new String[0] : visible.keySet()
608    .toArray(new String[visible.keySet().size()]);
609   
610    /*
611    * feature filters if any
612    */
613  3 outputFeatureFilters(out, visible, featureFilters);
614   
615    /*
616    * sort groups alphabetically, and ensure that features with a
617    * null or empty group are output after those in named groups
618    */
619  3 List<String> sortedGroups = new ArrayList<>(visibleFeatureGroups);
620  3 sortedGroups.remove(null);
621  3 sortedGroups.remove("");
622  3 Collections.sort(sortedGroups);
623  3 sortedGroups.add(null);
624  3 sortedGroups.add("");
625   
626  3 boolean foundSome = false;
627   
628    /*
629    * first output any non-positional features
630    */
631  3 if (includeNonPositional)
632    {
633  16 for (int i = 0; i < sequences.length; i++)
634    {
635  15 String sequenceName = sequences[i].getName();
636  15 for (SequenceFeature feature : sequences[i].getFeatures()
637    .getNonPositionalFeatures())
638    {
639  3 foundSome = true;
640  3 out.append(formatJalviewFeature(sequenceName, feature));
641    }
642    }
643    }
644   
645    /*
646    * positional features within groups
647    */
648  3 foundSome |= outputFeaturesByGroup(out, sortedGroups, types, sequences);
649   
650  3 return foundSome ? out.toString() : "No Features Visible";
651    }
652   
653    /**
654    * Outputs any feature filters defined for visible feature types, sandwiched by
655    * STARTFILTERS and ENDFILTERS lines
656    *
657    * @param out
658    * @param visible
659    * @param featureFilters
660    */
 
661  6 toggle void outputFeatureFilters(StringBuilder out,
662    Map<String, FeatureColourI> visible,
663    Map<String, FeatureMatcherSetI> featureFilters)
664    {
665  6 if (visible == null || featureFilters == null
666    || featureFilters.isEmpty())
667    {
668  4 return;
669    }
670   
671  2 boolean first = true;
672  2 for (String featureType : visible.keySet())
673    {
674  3 FeatureMatcherSetI filter = featureFilters.get(featureType);
675  3 if (filter != null)
676    {
677  2 if (first)
678    {
679  1 first = false;
680  1 out.append(newline).append(STARTFILTERS).append(newline);
681    }
682  2 out.append(featureType).append(TAB).append(filter.toStableString())
683    .append(newline);
684    }
685    }
686  2 if (!first)
687    {
688  1 out.append(ENDFILTERS).append(newline).append(newline);
689    }
690   
691    }
692   
693    /**
694    * Appends output of sequence features within feature groups to the output
695    * buffer. Groups other than the null or empty group are sandwiched by
696    * STARTGROUP and ENDGROUP lines.
697    *
698    * @param out
699    * @param groups
700    * @param featureTypes
701    * @param sequences
702    * @return
703    */
 
704  3 toggle private boolean outputFeaturesByGroup(StringBuilder out,
705    List<String> groups, String[] featureTypes, SequenceI[] sequences)
706    {
707  3 boolean foundSome = false;
708  3 for (String group : groups)
709    {
710  9 boolean isNamedGroup = (group != null && !"".equals(group));
711  9 if (isNamedGroup)
712    {
713  3 out.append(newline);
714  3 out.append(STARTGROUP).append(TAB);
715  3 out.append(group);
716  3 out.append(newline);
717    }
718   
719    /*
720    * output positional features within groups
721    */
722  144 for (int i = 0; i < sequences.length; i++)
723    {
724  135 String sequenceName = sequences[i].getName();
725  135 List<SequenceFeature> features = new ArrayList<>();
726  135 if (featureTypes.length > 0)
727    {
728  90 features.addAll(sequences[i].getFeatures().getFeaturesForGroup(
729    true, group, featureTypes));
730    }
731   
732  135 for (SequenceFeature sequenceFeature : features)
733    {
734  7 foundSome = true;
735  7 out.append(formatJalviewFeature(sequenceName, sequenceFeature));
736    }
737    }
738   
739  9 if (isNamedGroup)
740    {
741  3 out.append(ENDGROUP).append(TAB);
742  3 out.append(group);
743  3 out.append(newline);
744    }
745    }
746  3 return foundSome;
747    }
748   
749    /**
750    * @param out
751    * @param sequenceName
752    * @param sequenceFeature
753    */
 
754  10 toggle protected String formatJalviewFeature(
755    String sequenceName, SequenceFeature sequenceFeature)
756    {
757  10 StringBuilder out = new StringBuilder(64);
758  10 if (sequenceFeature.description == null
759    || sequenceFeature.description.equals(""))
760    {
761  0 out.append(sequenceFeature.type).append(TAB);
762    }
763    else
764    {
765  10 if (sequenceFeature.links != null
766    && sequenceFeature.getDescription().indexOf("<html>") == -1)
767    {
768  0 out.append("<html>");
769    }
770   
771  10 out.append(sequenceFeature.description);
772  10 if (sequenceFeature.links != null)
773    {
774  2 for (int l = 0; l < sequenceFeature.links.size(); l++)
775    {
776  1 String label = sequenceFeature.links.elementAt(l);
777  1 String href = label.substring(label.indexOf("|") + 1);
778  1 label = label.substring(0, label.indexOf("|"));
779   
780  1 if (sequenceFeature.description.indexOf(href) == -1)
781    {
782  0 out.append(" <a href=\"" + href + "\">" + label + "</a>");
783    }
784    }
785   
786  1 if (sequenceFeature.getDescription().indexOf("</html>") == -1)
787    {
788  0 out.append("</html>");
789    }
790    }
791   
792  10 out.append(TAB);
793    }
794  10 out.append(sequenceName);
795  10 out.append("\t-1\t");
796  10 out.append(sequenceFeature.begin);
797  10 out.append(TAB);
798  10 out.append(sequenceFeature.end);
799  10 out.append(TAB);
800  10 out.append(sequenceFeature.type);
801  10 if (!Float.isNaN(sequenceFeature.score))
802    {
803  8 out.append(TAB);
804  8 out.append(sequenceFeature.score);
805    }
806  10 out.append(newline);
807   
808  10 return out.toString();
809    }
810   
811    /**
812    * Parse method that is called when a GFF file is dragged to the desktop
813    */
 
814  2 toggle @Override
815    public void parse()
816    {
817  2 AlignViewportI av = getViewport();
818  2 if (av != null)
819    {
820  0 if (av.getAlignment() != null)
821    {
822  0 dataset = av.getAlignment().getDataset();
823    }
824  0 if (dataset == null)
825    {
826    // working in the applet context ?
827  0 dataset = av.getAlignment();
828    }
829    }
830    else
831    {
832  2 dataset = new Alignment(new SequenceI[] {});
833    }
834   
835  2 Map<String, FeatureColourI> featureColours = new HashMap<>();
836  2 boolean parseResult = parse(dataset, featureColours, false, true);
837  2 if (!parseResult)
838    {
839    // pass error up somehow
840    }
841  2 if (av != null)
842    {
843    // update viewport with the dataset data ?
844    }
845    else
846    {
847  2 setSeqs(dataset.getSequencesArray());
848    }
849    }
850   
851    /**
852    * Implementation of unused abstract method
853    *
854    * @return error message
855    */
 
856  0 toggle @Override
857    public String print(SequenceI[] sqs, boolean jvsuffix)
858    {
859  0 System.out.println("Use printGffFormat() or printJalviewFormat()");
860  0 return null;
861    }
862   
863    /**
864    * Returns features output in GFF2 format
865    *
866    * @param sequences
867    * the sequences whose features are to be output
868    * @param visible
869    * a map whose keys are the type names of visible features
870    * @param visibleFeatureGroups
871    * @param includeNonPositionalFeatures
872    * @return
873    */
 
874  7 toggle public String printGffFormat(SequenceI[] sequences,
875    Map<String, FeatureColourI> visible,
876    List<String> visibleFeatureGroups,
877    boolean includeNonPositionalFeatures)
878    {
879  7 StringBuilder out = new StringBuilder(256);
880   
881  7 out.append(String.format("%s %d\n", GFF_VERSION, gffVersion == 0 ? 2 : gffVersion));
882   
883  7 if (!includeNonPositionalFeatures
884    && (visible == null || visible.isEmpty()))
885    {
886  2 return out.toString();
887    }
888   
889  5 String[] types = visible == null ? new String[0] : visible.keySet()
890    .toArray(
891    new String[visible.keySet().size()]);
892   
893  5 for (SequenceI seq : sequences)
894    {
895  75 List<SequenceFeature> features = new ArrayList<>();
896  75 if (includeNonPositionalFeatures)
897    {
898  30 features.addAll(seq.getFeatures().getNonPositionalFeatures());
899    }
900  75 if (visible != null && !visible.isEmpty())
901    {
902  45 features.addAll(seq.getFeatures().getPositionalFeatures(types));
903    }
904   
905  75 for (SequenceFeature sf : features)
906    {
907  8 String source = sf.featureGroup;
908  8 if (!sf.isNonPositional() && source != null
909    && !visibleFeatureGroups.contains(source))
910    {
911    // group is not visible
912  1 continue;
913    }
914   
915  7 if (source == null)
916    {
917  3 source = sf.getDescription();
918    }
919   
920  7 out.append(seq.getName());
921  7 out.append(TAB);
922  7 out.append(source);
923  7 out.append(TAB);
924  7 out.append(sf.type);
925  7 out.append(TAB);
926  7 out.append(sf.begin);
927  7 out.append(TAB);
928  7 out.append(sf.end);
929  7 out.append(TAB);
930  7 out.append(sf.score);
931  7 out.append(TAB);
932   
933  7 int strand = sf.getStrand();
934  6 out.append(strand == 1 ? "+" : (strand == -1 ? "-" : "."));
935  7 out.append(TAB);
936   
937  7 String phase = sf.getPhase();
938  7 out.append(phase == null ? "." : phase);
939   
940    // miscellaneous key-values (GFF column 9)
941  7 String attributes = sf.getAttributes();
942  7 if (attributes != null)
943    {
944  1 out.append(TAB).append(attributes);
945    }
946   
947  7 out.append(newline);
948    }
949    }
950   
951  5 return out.toString();
952    }
953   
954    /**
955    * Returns a mapping given list of one or more Align descriptors (exonerate
956    * format)
957    *
958    * @param alignedRegions
959    * a list of "Align fromStart toStart fromCount"
960    * @param mapIsFromCdna
961    * if true, 'from' is dna, else 'from' is protein
962    * @param strand
963    * either 1 (forward) or -1 (reverse)
964    * @return
965    * @throws IOException
966    */
 
967  0 toggle protected MapList constructCodonMappingFromAlign(
968    List<String> alignedRegions, boolean mapIsFromCdna, int strand)
969    throws IOException
970    {
971  0 if (strand == 0)
972    {
973  0 throw new IOException(
974    "Invalid strand for a codon mapping (cannot be 0)");
975    }
976  0 int regions = alignedRegions.size();
977    // arrays to hold [start, end] for each aligned region
978  0 int[] fromRanges = new int[regions * 2]; // from dna
979  0 int[] toRanges = new int[regions * 2]; // to protein
980  0 int fromRangesIndex = 0;
981  0 int toRangesIndex = 0;
982   
983  0 for (String range : alignedRegions)
984    {
985    /*
986    * Align mapFromStart mapToStart mapFromCount
987    * e.g. if mapIsFromCdna
988    * Align 11270 143 120
989    * means:
990    * 120 bases from pos 11270 align to pos 143 in peptide
991    * if !mapIsFromCdna this would instead be
992    * Align 143 11270 40
993    */
994  0 String[] tokens = range.split(" ");
995  0 if (tokens.length != 3)
996    {
997  0 throw new IOException("Wrong number of fields for Align");
998    }
999  0 int fromStart = 0;
1000  0 int toStart = 0;
1001  0 int fromCount = 0;
1002  0 try
1003    {
1004  0 fromStart = Integer.parseInt(tokens[0]);
1005  0 toStart = Integer.parseInt(tokens[1]);
1006  0 fromCount = Integer.parseInt(tokens[2]);
1007    } catch (NumberFormatException nfe)
1008    {
1009  0 throw new IOException(
1010    "Invalid number in Align field: " + nfe.getMessage());
1011    }
1012   
1013    /*
1014    * Jalview always models from dna to protein, so adjust values if the
1015    * GFF mapping is from protein to dna
1016    */
1017  0 if (!mapIsFromCdna)
1018    {
1019  0 fromCount *= 3;
1020  0 int temp = fromStart;
1021  0 fromStart = toStart;
1022  0 toStart = temp;
1023    }
1024  0 fromRanges[fromRangesIndex++] = fromStart;
1025  0 fromRanges[fromRangesIndex++] = fromStart + strand * (fromCount - 1);
1026   
1027    /*
1028    * If a codon has an intron gap, there will be contiguous 'toRanges';
1029    * this is handled for us by the MapList constructor.
1030    * (It is not clear that exonerate ever generates this case)
1031    */
1032  0 toRanges[toRangesIndex++] = toStart;
1033  0 toRanges[toRangesIndex++] = toStart + (fromCount - 1) / 3;
1034    }
1035   
1036  0 return new MapList(fromRanges, toRanges, 3, 1);
1037    }
1038   
1039    /**
1040    * Parse a GFF format feature. This may include creating a 'dummy' sequence to
1041    * hold the feature, or for its mapped sequence, or both, to be resolved
1042    * either later in the GFF file (##FASTA section), or when the user loads
1043    * additional sequences.
1044    *
1045    * @param gffColumns
1046    * @param alignment
1047    * @param relaxedIdMatching
1048    * @param newseqs
1049    * @return
1050    */
 
1051  26 toggle protected SequenceI parseGff(String[] gffColumns, AlignmentI alignment,
1052    boolean relaxedIdMatching, List<SequenceI> newseqs)
1053    {
1054    /*
1055    * GFF: seqid source type start end score strand phase [attributes]
1056    */
1057  26 if (gffColumns.length < 5)
1058    {
1059  0 System.err.println("Ignoring GFF feature line with too few columns ("
1060    + gffColumns.length + ")");
1061  0 return null;
1062    }
1063   
1064    /*
1065    * locate referenced sequence in alignment _or_
1066    * as a forward or external reference (SequenceDummy)
1067    */
1068  26 String seqId = gffColumns[0];
1069  26 SequenceI seq = findSequence(seqId, alignment, newseqs,
1070    relaxedIdMatching);
1071   
1072  26 SequenceFeature sf = null;
1073  26 GffHelperI helper = GffHelperFactory.getHelper(gffColumns);
1074  26 if (helper != null)
1075    {
1076  26 try
1077    {
1078  26 sf = helper.processGff(seq, gffColumns, alignment, newseqs,
1079    relaxedIdMatching);
1080  26 if (sf != null)
1081    {
1082  19 seq.addSequenceFeature(sf);
1083  ? while ((seq = alignment.findName(seq, seqId, true)) != null)
1084    {
1085  0 seq.addSequenceFeature(new SequenceFeature(sf));
1086    }
1087    }
1088    } catch (IOException e)
1089    {
1090  0 System.err.println("GFF parsing failed with: " + e.getMessage());
1091  0 return null;
1092    }
1093    }
1094   
1095  26 return seq;
1096    }
1097   
1098    /**
1099    * Process the 'column 9' data of the GFF file. This is less formally defined,
1100    * and its interpretation will vary depending on the tool that has generated
1101    * it.
1102    *
1103    * @param attributes
1104    * @param sf
1105    */
 
1106  0 toggle protected void processGffColumnNine(String attributes, SequenceFeature sf)
1107    {
1108  0 sf.setAttributes(attributes);
1109   
1110    /*
1111    * Parse attributes in column 9 and add them to the sequence feature's
1112    * 'otherData' table; use Note as a best proxy for description
1113    */
1114  0 char nameValueSeparator = gffVersion == 3 ? '=' : ' ';
1115    // TODO check we don't break GFF2 values which include commas here
1116  0 Map<String, List<String>> nameValues = GffHelperBase
1117    .parseNameValuePairs(attributes, ";", nameValueSeparator, ",");
1118  0 for (Entry<String, List<String>> attr : nameValues.entrySet())
1119    {
1120  0 String values = StringUtils.listToDelimitedString(attr.getValue(),
1121    "; ");
1122  0 sf.setValue(attr.getKey(), values);
1123  0 if (NOTE.equals(attr.getKey()))
1124    {
1125  0 sf.setDescription(values);
1126    }
1127    }
1128    }
1129   
1130    /**
1131    * After encountering ##fasta in a GFF3 file, process the remainder of the
1132    * file as FAST sequence data. Any placeholder sequences created during
1133    * feature parsing are updated with the actual sequences.
1134    *
1135    * @param align
1136    * @param newseqs
1137    * @throws IOException
1138    */
 
1139  4 toggle protected void processAsFasta(AlignmentI align, List<SequenceI> newseqs)
1140    throws IOException
1141    {
1142  4 try
1143    {
1144  4 mark();
1145    } catch (IOException q)
1146    {
1147    }
1148  4 FastaFile parser = new FastaFile(this);
1149  4 List<SequenceI> includedseqs = parser.getSeqs();
1150   
1151  4 SequenceIdMatcher smatcher = new SequenceIdMatcher(newseqs);
1152   
1153    /*
1154    * iterate over includedseqs, and replacing matching ones with newseqs
1155    * sequences. Generic iterator not used here because we modify
1156    * includedseqs as we go
1157    */
1158  12 for (int p = 0, pSize = includedseqs.size(); p < pSize; p++)
1159    {
1160    // search for any dummy seqs that this sequence can be used to update
1161  8 SequenceI includedSeq = includedseqs.get(p);
1162  8 SequenceI dummyseq = smatcher.findIdMatch(includedSeq);
1163  8 if (dummyseq != null && dummyseq instanceof SequenceDummy)
1164    {
1165    // probably have the pattern wrong
1166    // idea is that a flyweight proxy for a sequence ID can be created for
1167    // 1. stable reference creation
1168    // 2. addition of annotation
1169    // 3. future replacement by a real sequence
1170    // current pattern is to create SequenceDummy objects - a convenience
1171    // constructor for a Sequence.
1172    // problem is that when promoted to a real sequence, all references
1173    // need to be updated somehow. We avoid that by keeping the same object.
1174  8 ((SequenceDummy) dummyseq).become(includedSeq);
1175  8 dummyseq.createDatasetSequence();
1176   
1177    /*
1178    * Update mappings so they are now to the dataset sequence
1179    */
1180  8 for (AlignedCodonFrame mapping : align.getCodonFrames())
1181    {
1182  8 mapping.updateToDataset(dummyseq);
1183    }
1184   
1185    /*
1186    * replace parsed sequence with the realised forward reference
1187    */
1188  8 includedseqs.set(p, dummyseq);
1189   
1190    /*
1191    * and remove from the newseqs list
1192    */
1193  8 newseqs.remove(dummyseq);
1194    }
1195    }
1196   
1197    /*
1198    * finally add sequences to the dataset
1199    */
1200  4 for (SequenceI seq : includedseqs)
1201    {
1202    // experimental: mapping-based 'alignment' to query sequence
1203  8 AlignmentUtils.alignSequenceAs(seq, align,
1204    String.valueOf(align.getGapCharacter()), false, true);
1205   
1206    // rename sequences if GFF handler requested this
1207    // TODO a more elegant way e.g. gffHelper.postProcess(newseqs) ?
1208  8 List<SequenceFeature> sfs = seq.getFeatures().getPositionalFeatures();
1209  8 if (!sfs.isEmpty())
1210    {
1211  4 String newName = (String) sfs.get(0).getValue(
1212    GffHelperI.RENAME_TOKEN);
1213  4 if (newName != null)
1214    {
1215  0 seq.setName(newName);
1216    }
1217    }
1218  8 align.addSequence(seq);
1219    }
1220    }
1221   
1222    /**
1223    * Process a ## directive
1224    *
1225    * @param line
1226    * @param gffProps
1227    * @param align
1228    * @param newseqs
1229    * @throws IOException
1230    */
 
1231  26 toggle protected void processGffPragma(String line, Map<String, String> gffProps,
1232    AlignmentI align, List<SequenceI> newseqs) throws IOException
1233    {
1234  26 line = line.trim();
1235  26 if ("###".equals(line))
1236    {
1237    // close off any open 'forward references'
1238  0 return;
1239    }
1240   
1241  26 String[] tokens = line.substring(2).split(" ");
1242  26 String pragma = tokens[0];
1243  26 String value = tokens.length == 1 ? null : tokens[1];
1244   
1245  26 if ("gff-version".equalsIgnoreCase(pragma))
1246    {
1247  7 if (value != null)
1248    {
1249  7 try
1250    {
1251    // value may be e.g. "3.1.2"
1252  7 gffVersion = Integer.parseInt(value.split("\\.")[0]);
1253    } catch (NumberFormatException e)
1254    {
1255    // ignore
1256    }
1257    }
1258    }
1259  19 else if ("sequence-region".equalsIgnoreCase(pragma))
1260    {
1261    // could capture <seqid start end> if wanted here
1262    }
1263  19 else if ("feature-ontology".equalsIgnoreCase(pragma))
1264    {
1265    // should resolve against the specified feature ontology URI
1266    }
1267  19 else if ("attribute-ontology".equalsIgnoreCase(pragma))
1268    {
1269    // URI of attribute ontology - not currently used in GFF3
1270    }
1271  19 else if ("source-ontology".equalsIgnoreCase(pragma))
1272    {
1273    // URI of source ontology - not currently used in GFF3
1274    }
1275  19 else if ("species-build".equalsIgnoreCase(pragma))
1276    {
1277    // save URI of specific NCBI taxon version of annotations
1278  0 gffProps.put("species-build", value);
1279    }
1280  19 else if ("fasta".equalsIgnoreCase(pragma))
1281    {
1282    // process the rest of the file as a fasta file and replace any dummy
1283    // sequence IDs
1284  4 processAsFasta(align, newseqs);
1285    }
1286    else
1287    {
1288  15 System.err.println("Ignoring unknown pragma: " + line);
1289    }
1290    }
1291    }