1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.io.gff; |
22 |
|
|
23 |
|
import jalview.datamodel.AlignedCodonFrame; |
24 |
|
import jalview.datamodel.AlignmentI; |
25 |
|
import jalview.datamodel.MappingType; |
26 |
|
import jalview.datamodel.SequenceFeature; |
27 |
|
import jalview.datamodel.SequenceI; |
28 |
|
import jalview.util.MapList; |
29 |
|
import jalview.util.StringUtils; |
30 |
|
|
31 |
|
import java.io.IOException; |
32 |
|
import java.util.List; |
33 |
|
import java.util.Map; |
34 |
|
|
35 |
|
|
36 |
|
|
37 |
|
|
38 |
|
|
39 |
|
|
|
|
| 86.5% |
Uncovered Elements: 18 (133) |
Complexity: 28 |
Complexity Density: 0.3 |
|
40 |
|
public class Gff3Helper extends GffHelperBase |
41 |
|
{ |
42 |
|
public static final String ALLELES = "alleles"; |
43 |
|
|
44 |
|
protected static final String TARGET = "Target"; |
45 |
|
|
46 |
|
protected static final String ID = "ID"; |
47 |
|
|
48 |
|
private static final String NAME = "Name"; |
49 |
|
|
50 |
|
|
51 |
|
|
52 |
|
|
53 |
|
|
54 |
|
@param |
55 |
|
@return |
56 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
57 |
8 |
public static Map<String, List<String>> parseNameValuePairs(String text)... |
58 |
|
{ |
59 |
8 |
return parseNameValuePairs(text, ";", '=', ","); |
60 |
|
} |
61 |
|
|
62 |
|
|
63 |
|
|
64 |
|
|
65 |
|
@param |
66 |
|
|
67 |
|
@param |
68 |
|
|
69 |
|
|
70 |
|
@param |
71 |
|
|
72 |
|
@param |
73 |
|
|
74 |
|
@param |
75 |
|
|
76 |
|
@return |
77 |
|
|
78 |
|
|
79 |
|
@throws |
80 |
|
|
|
|
| 78.9% |
Uncovered Elements: 4 (19) |
Complexity: 4 |
Complexity Density: 0.31 |
|
81 |
7 |
@Override... |
82 |
|
public SequenceFeature processGff(SequenceI seq, String[] gff, |
83 |
|
AlignmentI align, List<SequenceI> newseqs, |
84 |
|
boolean relaxedIdMatching) throws IOException |
85 |
|
{ |
86 |
7 |
SequenceFeature sf = null; |
87 |
|
|
88 |
7 |
if (gff.length == 9) |
89 |
|
{ |
90 |
7 |
String soTerm = gff[TYPE_COL]; |
91 |
7 |
String atts = gff[ATTRIBUTES_COL]; |
92 |
7 |
Map<String, List<String>> attributes = parseNameValuePairs(atts); |
93 |
|
|
94 |
7 |
SequenceOntologyI so = SequenceOntologyFactory.getInstance(); |
95 |
7 |
if (so.isA(soTerm, SequenceOntologyI.PROTEIN_MATCH)) |
96 |
|
{ |
97 |
0 |
sf = processProteinMatch(attributes, seq, gff, align, newseqs, |
98 |
|
relaxedIdMatching); |
99 |
|
} |
100 |
7 |
else if (so.isA(soTerm, SequenceOntologyI.NUCLEOTIDE_MATCH)) |
101 |
|
{ |
102 |
5 |
sf = processNucleotideMatch(attributes, seq, gff, align, newseqs, |
103 |
|
relaxedIdMatching); |
104 |
|
} |
105 |
|
else |
106 |
|
{ |
107 |
2 |
sf = buildSequenceFeature(gff, attributes); |
108 |
|
} |
109 |
|
} |
110 |
|
else |
111 |
|
{ |
112 |
|
|
113 |
|
|
114 |
|
|
115 |
0 |
sf = buildSequenceFeature(gff, null); |
116 |
|
} |
117 |
|
|
118 |
7 |
return sf; |
119 |
|
} |
120 |
|
|
121 |
|
|
122 |
|
|
123 |
|
|
124 |
|
@param |
125 |
|
|
126 |
|
@param |
127 |
|
|
128 |
|
@param |
129 |
|
|
130 |
|
@param |
131 |
|
|
132 |
|
|
133 |
|
@param |
134 |
|
|
135 |
|
@param |
136 |
|
|
137 |
|
@return |
138 |
|
|
139 |
|
@throws |
140 |
|
|
|
|
| 78.7% |
Uncovered Elements: 10 (47) |
Complexity: 9 |
Complexity Density: 0.26 |
|
141 |
5 |
protected SequenceFeature processNucleotideMatch(... |
142 |
|
Map<String, List<String>> attributes, SequenceI seq, |
143 |
|
String[] gffColumns, AlignmentI align, List<SequenceI> newseqs, |
144 |
|
boolean relaxedIdMatching) throws IOException |
145 |
|
{ |
146 |
5 |
String strand = gffColumns[STRAND_COL]; |
147 |
|
|
148 |
|
|
149 |
|
|
150 |
|
|
151 |
|
|
152 |
|
|
153 |
|
|
154 |
|
|
155 |
5 |
if ("-".equals(strand)) |
156 |
|
{ |
157 |
1 |
jalview.bin.Console.errPrintln( |
158 |
|
"Skipping mapping from reverse complement as not yet supported"); |
159 |
1 |
return null; |
160 |
|
} |
161 |
|
|
162 |
4 |
List<String> targets = attributes.get(TARGET); |
163 |
4 |
if (targets == null) |
164 |
|
{ |
165 |
0 |
jalview.bin.Console.errPrintln("'Target' missing in GFF"); |
166 |
0 |
return null; |
167 |
|
} |
168 |
|
|
169 |
|
|
170 |
|
|
171 |
|
|
172 |
|
|
173 |
4 |
for (String target : targets) |
174 |
|
{ |
175 |
|
|
176 |
|
|
177 |
|
|
178 |
4 |
String[] tokens = target.split(" "); |
179 |
4 |
if (tokens.length < 3) |
180 |
|
{ |
181 |
0 |
jalview.bin.Console.errPrintln("Incomplete Target: " + target); |
182 |
0 |
continue; |
183 |
|
} |
184 |
|
|
185 |
|
|
186 |
|
|
187 |
|
|
188 |
|
|
189 |
4 |
String targetId = findTargetId(tokens[0], attributes); |
190 |
4 |
SequenceI mappedSequence1 = findSequence(targetId, align, newseqs, |
191 |
|
relaxedIdMatching); |
192 |
4 |
SequenceI mappedSequence = mappedSequence1; |
193 |
4 |
if (mappedSequence == null) |
194 |
|
{ |
195 |
0 |
continue; |
196 |
|
} |
197 |
|
|
198 |
|
|
199 |
|
|
200 |
|
|
201 |
|
|
202 |
4 |
AlignedCodonFrame acf = getMapping(align, seq, mappedSequence); |
203 |
|
|
204 |
4 |
try |
205 |
|
{ |
206 |
4 |
int toStart = Integer.parseInt(tokens[1]); |
207 |
4 |
int toEnd = Integer.parseInt(tokens[2]); |
208 |
4 |
if (tokens.length > 3 && "-".equals(tokens[3])) |
209 |
|
{ |
210 |
|
|
211 |
1 |
int temp = toStart; |
212 |
1 |
toStart = toEnd; |
213 |
1 |
toEnd = temp; |
214 |
|
} |
215 |
|
|
216 |
4 |
int fromStart = Integer.parseInt(gffColumns[START_COL]); |
217 |
4 |
int fromEnd = Integer.parseInt(gffColumns[END_COL]); |
218 |
4 |
MapList mapping = constructMappingFromAlign(fromStart, fromEnd, |
219 |
|
toStart, toEnd, MappingType.NucleotideToNucleotide); |
220 |
|
|
221 |
4 |
if (mapping != null) |
222 |
|
{ |
223 |
4 |
acf.addMap(seq, mappedSequence, mapping); |
224 |
4 |
align.addCodonFrame(acf); |
225 |
|
} |
226 |
|
} catch (NumberFormatException nfe) |
227 |
|
{ |
228 |
0 |
jalview.bin.Console |
229 |
|
.errPrintln("Invalid start or end in Target " + target); |
230 |
|
} |
231 |
|
} |
232 |
|
|
233 |
4 |
SequenceFeature sf = buildSequenceFeature(gffColumns, attributes); |
234 |
4 |
return sf; |
235 |
|
} |
236 |
|
|
237 |
|
|
238 |
|
|
239 |
|
|
240 |
|
|
241 |
|
|
242 |
|
@param |
243 |
|
|
244 |
|
|
245 |
|
@param |
246 |
|
|
247 |
|
@return |
248 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
249 |
4 |
@SuppressWarnings("unused")... |
250 |
|
protected String findTargetId(String target, |
251 |
|
Map<String, List<String>> set) |
252 |
|
{ |
253 |
4 |
return target; |
254 |
|
} |
255 |
|
|
256 |
|
|
257 |
|
|
258 |
|
|
259 |
|
|
260 |
|
|
261 |
|
|
262 |
|
|
263 |
|
|
264 |
|
|
265 |
|
@param |
266 |
|
|
267 |
|
@param |
268 |
|
|
269 |
|
@param |
270 |
|
|
271 |
|
@param |
272 |
|
|
273 |
|
|
274 |
|
@param |
275 |
|
|
276 |
|
@param |
277 |
|
|
278 |
|
@return |
279 |
|
@throws |
280 |
|
|
|
|
| 86.2% |
Uncovered Elements: 4 (29) |
Complexity: 4 |
Complexity Density: 0.17 |
|
281 |
1 |
protected SequenceFeature processProteinMatch(... |
282 |
|
Map<String, List<String>> set, SequenceI seq, String[] gffColumns, |
283 |
|
AlignmentI align, List<SequenceI> newseqs, |
284 |
|
boolean relaxedIdMatching) |
285 |
|
{ |
286 |
|
|
287 |
|
|
288 |
|
|
289 |
|
|
290 |
|
|
291 |
|
|
292 |
|
|
293 |
1 |
SequenceFeature sf = buildSequenceFeature(gffColumns, set); |
294 |
|
|
295 |
|
|
296 |
|
|
297 |
|
|
298 |
|
|
299 |
1 |
List<String> targets = set.get(TARGET); |
300 |
1 |
if (targets != null) |
301 |
|
{ |
302 |
1 |
for (String target : targets) |
303 |
|
{ |
304 |
|
|
305 |
1 |
SequenceI mappedSequence1 = findSequence(findTargetId(target, set), |
306 |
|
align, newseqs, relaxedIdMatching); |
307 |
1 |
SequenceI mappedSequence = mappedSequence1; |
308 |
1 |
if (mappedSequence == null) |
309 |
|
{ |
310 |
0 |
continue; |
311 |
|
} |
312 |
|
|
313 |
|
|
314 |
|
|
315 |
|
|
316 |
|
|
317 |
1 |
int sequenceFeatureLength = 1 + sf.getEnd() - sf.getBegin(); |
318 |
1 |
SequenceFeature sf2 = new SequenceFeature(sf, 1, |
319 |
|
sequenceFeatureLength, sf.getFeatureGroup(), sf.getScore()); |
320 |
1 |
mappedSequence.addSequenceFeature(sf2); |
321 |
|
|
322 |
|
|
323 |
|
|
324 |
|
|
325 |
|
|
326 |
|
|
327 |
1 |
String accessionId = StringUtils |
328 |
|
.listToDelimitedString(set.get(NAME), ","); |
329 |
1 |
if (accessionId.length() > 0) |
330 |
|
{ |
331 |
1 |
String database = sf.getType(); |
332 |
1 |
String qualifiedAccId = database + "|" + accessionId; |
333 |
1 |
sf2.setValue(RENAME_TOKEN, qualifiedAccId); |
334 |
|
} |
335 |
|
|
336 |
|
|
337 |
|
|
338 |
|
|
339 |
|
|
340 |
1 |
AlignedCodonFrame alco = getMapping(align, seq, mappedSequence); |
341 |
1 |
int[] from = new int[] { sf.getBegin(), sf.getEnd() }; |
342 |
1 |
int[] to = new int[] { 1, sequenceFeatureLength }; |
343 |
1 |
MapList mapping = new MapList(from, to, 1, 1); |
344 |
|
|
345 |
1 |
alco.addMap(seq, mappedSequence, mapping); |
346 |
1 |
align.addCodonFrame(alco); |
347 |
|
} |
348 |
|
} |
349 |
|
|
350 |
1 |
return sf; |
351 |
|
} |
352 |
|
|
353 |
|
|
354 |
|
|
355 |
|
|
356 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (7) |
Complexity: 2 |
Complexity Density: 0.4 |
|
357 |
7 |
@Override... |
358 |
|
protected SequenceFeature buildSequenceFeature(String[] gff, |
359 |
|
int typeColumn, String group, |
360 |
|
Map<String, List<String>> attributes) |
361 |
|
{ |
362 |
7 |
SequenceFeature sf = super.buildSequenceFeature(gff, typeColumn, group, |
363 |
|
attributes); |
364 |
7 |
String desc = getDescription(sf, attributes); |
365 |
7 |
if (desc != null) |
366 |
|
{ |
367 |
6 |
sf.setDescription(desc); |
368 |
|
} |
369 |
7 |
return sf; |
370 |
|
} |
371 |
|
|
372 |
|
|
373 |
|
|
374 |
|
|
375 |
|
@param |
376 |
|
@param |
377 |
|
@return |
378 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (22) |
Complexity: 7 |
Complexity Density: 0.5 |
|
379 |
15 |
protected String getDescription(SequenceFeature sf,... |
380 |
|
Map<String, List<String>> attributes) |
381 |
|
{ |
382 |
15 |
String desc = null; |
383 |
15 |
String target = (String) sf.getValue(TARGET); |
384 |
15 |
if (target != null) |
385 |
|
{ |
386 |
6 |
desc = target.split(" ")[0]; |
387 |
|
} |
388 |
|
|
389 |
15 |
SequenceOntologyI so = SequenceOntologyFactory.getInstance(); |
390 |
15 |
String type = sf.getType(); |
391 |
15 |
if (so.isA(type, SequenceOntologyI.SEQUENCE_VARIANT)) |
392 |
|
{ |
393 |
|
|
394 |
|
|
395 |
|
|
396 |
2 |
desc = StringUtils.listToDelimitedString(attributes.get(ALLELES), |
397 |
|
","); |
398 |
|
} |
399 |
|
|
400 |
|
|
401 |
|
|
402 |
|
|
403 |
|
|
404 |
15 |
if (SequenceOntologyI.NMD_TRANSCRIPT_VARIANT.equals(type) |
405 |
|
|| so.isA(type, SequenceOntologyI.TRANSCRIPT) |
406 |
|
|| so.isA(type, SequenceOntologyI.EXON)) |
407 |
|
{ |
408 |
4 |
desc = StringUtils.listToDelimitedString(attributes.get("Name"), ","); |
409 |
|
} |
410 |
|
|
411 |
|
|
412 |
|
|
413 |
|
|
414 |
15 |
if (desc == null) |
415 |
|
{ |
416 |
4 |
desc = (String) sf.getValue(ID); |
417 |
|
} |
418 |
|
|
419 |
|
|
420 |
|
|
421 |
|
|
422 |
15 |
desc = StringUtils.urlDecode(desc, GFF_ENCODABLE); |
423 |
|
|
424 |
15 |
return desc; |
425 |
|
} |
426 |
|
} |