Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
InterProScanHelper | 35 | 18 | 11 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.io.gff; | |
22 | ||
23 | import jalview.datamodel.AlignmentI; | |
24 | import jalview.datamodel.SequenceFeature; | |
25 | import jalview.datamodel.SequenceI; | |
26 | import jalview.util.StringUtils; | |
27 | ||
28 | import java.io.IOException; | |
29 | import java.util.List; | |
30 | import java.util.Map; | |
31 | ||
32 | /** | |
33 | * A handler to parse GFF in the format generated by InterProScan | |
34 | */ | |
35 | public class InterProScanHelper extends Gff3Helper | |
36 | { | |
37 | private static final String INTER_PRO_SCAN = "InterProScan"; | |
38 | ||
39 | private static final String SIGNATURE_DESC = "signature_desc"; | |
40 | ||
41 | /** | |
42 | * Process one GFF feature line (as modelled by SequenceFeature) | |
43 | * | |
44 | * @param seq | |
45 | * the sequence with which this feature is associated | |
46 | * @param gff | |
47 | * the gff column data | |
48 | * @param align | |
49 | * the alignment we are adding GFF to | |
50 | * @param newseqs | |
51 | * any new sequences referenced by the GFF | |
52 | * @param relaxedIdMatching | |
53 | * if true, match word tokens in sequence names | |
54 | * @return a sequence feature if one should be added to the sequence, else | |
55 | * null (i.e. it has been processed in another way e.g. to generate a | |
56 | * mapping) | |
57 | * @throws IOException | |
58 | */ | |
59 | 0 | @Override |
60 | public SequenceFeature processGff(SequenceI seq, String[] gff, | |
61 | AlignmentI align, List<SequenceI> newseqs, | |
62 | boolean relaxedIdMatching) throws IOException | |
63 | { | |
64 | /* | |
65 | * ignore the 'polypeptide' match of the whole sequence | |
66 | */ | |
67 | 0 | if (".".equals(gff[SOURCE_COL])) |
68 | { | |
69 | 0 | return null; |
70 | } | |
71 | ||
72 | 0 | return super.processGff(seq, gff, align, newseqs, relaxedIdMatching); |
73 | } | |
74 | ||
75 | /** | |
76 | * An override that | |
77 | * <ul> | |
78 | * <li>uses Source (column 2) as feature type instead of the default column | |
79 | * 3</li> | |
80 | * <li>sets "InterProScan" as the feature group</li> | |
81 | * <li>extracts "signature_desc" attribute as the feature description</li> | |
82 | * </ul> | |
83 | */ | |
84 | 1 | @Override |
85 | protected SequenceFeature buildSequenceFeature(String[] gff, | |
86 | Map<String, List<String>> attributes) | |
87 | { | |
88 | 1 | SequenceFeature sf = super.buildSequenceFeature(gff, SOURCE_COL, |
89 | INTER_PRO_SCAN, attributes); | |
90 | ||
91 | /* | |
92 | * signature_desc is a more informative source of description | |
93 | */ | |
94 | 1 | List<String> desc = attributes.get(SIGNATURE_DESC); |
95 | 1 | String description = StringUtils.listToDelimitedString(desc, ", "); |
96 | 1 | if (description.length() > 0) |
97 | { | |
98 | 1 | sf.setDescription(description); |
99 | } | |
100 | ||
101 | 1 | return sf; |
102 | } | |
103 | ||
104 | /** | |
105 | * Tests whether the GFF data looks like it was generated by InterProScan | |
106 | * | |
107 | * @param columns | |
108 | * @return | |
109 | */ | |
110 | 24 | public static boolean recognises(String[] columns) |
111 | { | |
112 | 24 | SequenceOntologyI so = SequenceOntologyFactory.getInstance(); |
113 | 24 | String type = columns[TYPE_COL]; |
114 | 24 | if (so.isA(type, SequenceOntologyI.PROTEIN_MATCH) |
115 | || (".".equals(columns[SOURCE_COL]) | |
116 | && so.isA(type, SequenceOntologyI.POLYPEPTIDE))) | |
117 | { | |
118 | 1 | return true; |
119 | } | |
120 | 23 | return false; |
121 | } | |
122 | ||
123 | /** | |
124 | * Overriden method, because InterProScan GFF has the target sequence id in | |
125 | * GFF field 'ID' rather than the usual 'Target' :-O | |
126 | */ | |
127 | 1 | @Override |
128 | protected String findTargetId(String target, | |
129 | Map<String, List<String>> set) | |
130 | { | |
131 | 1 | List<String> ids = set.get(ID); |
132 | 1 | if (ids == null || ids.size() != 1) |
133 | { | |
134 | 0 | return null; |
135 | } | |
136 | 1 | return ids.get(0); |
137 | } | |
138 | ||
139 | } |