1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.io.gff; |
22 |
|
|
23 |
|
import jalview.datamodel.AlignedCodonFrame; |
24 |
|
import jalview.datamodel.AlignmentI; |
25 |
|
import jalview.datamodel.MappingType; |
26 |
|
import jalview.datamodel.SequenceFeature; |
27 |
|
import jalview.datamodel.SequenceI; |
28 |
|
import jalview.util.MapList; |
29 |
|
|
30 |
|
import java.io.IOException; |
31 |
|
import java.util.List; |
32 |
|
import java.util.Map; |
33 |
|
|
34 |
|
|
35 |
|
|
36 |
|
|
|
|
| 84.9% |
Uncovered Elements: 19 (126) |
Complexity: 37 |
Complexity Density: 0.42 |
|
37 |
|
public class ExonerateHelper extends Gff2Helper |
38 |
|
{ |
39 |
|
private static final String SIMILARITY = "similarity"; |
40 |
|
|
41 |
|
private static final String GENOME2GENOME = "genome2genome"; |
42 |
|
|
43 |
|
private static final String CDNA2GENOME = "cdna2genome"; |
44 |
|
|
45 |
|
private static final String CODING2GENOME = "coding2genome"; |
46 |
|
|
47 |
|
private static final String CODING2CODING = "coding2coding"; |
48 |
|
|
49 |
|
private static final String PROTEIN2GENOME = "protein2genome"; |
50 |
|
|
51 |
|
private static final String PROTEIN2DNA = "protein2dna"; |
52 |
|
|
53 |
|
private static final String ALIGN = "Align"; |
54 |
|
|
55 |
|
private static final String QUERY = "Query"; |
56 |
|
|
57 |
|
private static final String TARGET = "Target"; |
58 |
|
|
59 |
|
|
60 |
|
|
61 |
|
|
62 |
|
@param |
63 |
|
|
64 |
|
@param |
65 |
|
|
66 |
|
|
67 |
|
@param |
68 |
|
|
69 |
|
@param |
70 |
|
|
71 |
|
@param |
72 |
|
|
73 |
|
@return |
74 |
|
|
75 |
|
|
76 |
|
|
|
|
| 83.3% |
Uncovered Elements: 1 (6) |
Complexity: 2 |
Complexity Density: 0.33 |
|
77 |
7 |
@Override... |
78 |
|
public SequenceFeature processGff(SequenceI seq, String[] gffColumns, |
79 |
|
AlignmentI align, List<SequenceI> newseqs, |
80 |
|
boolean relaxedIdMatching) |
81 |
|
{ |
82 |
7 |
String attr = gffColumns[ATTRIBUTES_COL]; |
83 |
7 |
Map<String, List<String>> set = parseNameValuePairs(attr); |
84 |
|
|
85 |
7 |
try |
86 |
|
{ |
87 |
7 |
processGffSimilarity(set, seq, gffColumns, align, newseqs, |
88 |
|
relaxedIdMatching); |
89 |
|
} catch (IOException ivfe) |
90 |
|
{ |
91 |
0 |
System.err.println(ivfe); |
92 |
|
} |
93 |
|
|
94 |
|
|
95 |
|
|
96 |
|
|
97 |
|
|
98 |
7 |
return null; |
99 |
|
} |
100 |
|
|
101 |
|
|
102 |
|
|
103 |
|
|
104 |
|
|
105 |
|
|
106 |
|
@param |
107 |
|
|
108 |
|
@param |
109 |
|
|
110 |
|
@param |
111 |
|
|
112 |
|
@param |
113 |
|
|
114 |
|
|
115 |
|
@param |
116 |
|
|
117 |
|
@param |
118 |
|
|
119 |
|
@throws |
120 |
|
|
|
|
| 80% |
Uncovered Elements: 9 (45) |
Complexity: 12 |
Complexity Density: 0.39 |
|
121 |
11 |
protected void processGffSimilarity(Map<String, List<String>> set,... |
122 |
|
SequenceI seq, String[] gff, AlignmentI align, |
123 |
|
List<SequenceI> newseqs, boolean relaxedIdMatching) |
124 |
|
throws IOException |
125 |
|
{ |
126 |
|
|
127 |
|
|
128 |
|
|
129 |
|
|
130 |
|
|
131 |
|
|
132 |
|
|
133 |
|
|
134 |
|
|
135 |
11 |
boolean featureIsOnTarget = true; |
136 |
11 |
List<String> mapTo = set.get(QUERY); |
137 |
11 |
if (mapTo == null) |
138 |
|
{ |
139 |
3 |
mapTo = set.get(TARGET); |
140 |
3 |
featureIsOnTarget = false; |
141 |
|
} |
142 |
11 |
MappingType type = getMappingType(gff[SOURCE_COL]); |
143 |
|
|
144 |
11 |
if (type == null) |
145 |
|
{ |
146 |
0 |
throw new IOException("Sorry, I don't handle " + gff[SOURCE_COL]); |
147 |
|
} |
148 |
|
|
149 |
11 |
if (mapTo == null || mapTo.size() != 1) |
150 |
|
{ |
151 |
0 |
throw new IOException( |
152 |
|
"Expecting exactly one sequence in Query or Target field (got " |
153 |
|
+ mapTo + ")"); |
154 |
|
} |
155 |
|
|
156 |
|
|
157 |
|
|
158 |
|
|
159 |
11 |
SequenceI mappedSequence = findSequence(mapTo.get(0), align, newseqs, |
160 |
|
relaxedIdMatching); |
161 |
|
|
162 |
|
|
163 |
|
|
164 |
|
|
165 |
11 |
SequenceI mapFromSequence = seq; |
166 |
11 |
SequenceI mapToSequence = mappedSequence; |
167 |
11 |
if ((type == MappingType.NucleotideToPeptide && featureIsOnTarget) |
168 |
|
|| (type == MappingType.PeptideToNucleotide |
169 |
|
&& !featureIsOnTarget)) |
170 |
|
{ |
171 |
3 |
mapFromSequence = mappedSequence; |
172 |
3 |
mapToSequence = seq; |
173 |
|
} |
174 |
|
|
175 |
|
|
176 |
|
|
177 |
|
|
178 |
|
|
179 |
|
|
180 |
|
|
181 |
|
|
182 |
|
|
183 |
|
|
184 |
|
|
185 |
|
|
186 |
11 |
AlignedCodonFrame acf = getMapping(align, mapFromSequence, |
187 |
|
mapToSequence); |
188 |
|
|
189 |
|
|
190 |
|
|
191 |
|
|
192 |
|
|
193 |
11 |
String strand = gff[STRAND_COL]; |
194 |
11 |
boolean forwardStrand = true; |
195 |
11 |
if ("-".equals(strand)) |
196 |
|
{ |
197 |
9 |
forwardStrand = false; |
198 |
|
} |
199 |
2 |
else if (!"+".equals(strand)) |
200 |
|
{ |
201 |
0 |
System.err.println("Strand must be specified for alignment"); |
202 |
0 |
return; |
203 |
|
} |
204 |
|
|
205 |
11 |
List<String> alignedRegions = set.get(ALIGN); |
206 |
11 |
for (String region : alignedRegions) |
207 |
|
{ |
208 |
15 |
MapList mapping = buildMapping(region, type, forwardStrand, |
209 |
|
featureIsOnTarget, gff); |
210 |
|
|
211 |
15 |
if (mapping == null) |
212 |
|
{ |
213 |
0 |
continue; |
214 |
|
} |
215 |
|
|
216 |
15 |
acf.addMap(mapFromSequence, mapToSequence, mapping); |
217 |
|
} |
218 |
11 |
align.addCodonFrame(acf); |
219 |
|
} |
220 |
|
|
221 |
|
|
222 |
|
|
223 |
|
|
224 |
|
@param |
225 |
|
@param |
226 |
|
@param |
227 |
|
@param |
228 |
|
@param |
229 |
|
@return |
230 |
|
|
|
|
| 88.1% |
Uncovered Elements: 5 (42) |
Complexity: 6 |
Complexity Density: 0.18 |
|
231 |
15 |
protected MapList buildMapping(String region, MappingType type,... |
232 |
|
boolean forwardStrand, boolean featureIsOnTarget, String[] gff) |
233 |
|
{ |
234 |
|
|
235 |
|
|
236 |
|
|
237 |
15 |
String[] tokens = region.split(" "); |
238 |
15 |
if (tokens.length != 3) |
239 |
|
{ |
240 |
0 |
System.err.println("Malformed Align descriptor: " + region); |
241 |
0 |
return null; |
242 |
|
} |
243 |
|
|
244 |
|
|
245 |
|
|
246 |
|
|
247 |
|
|
248 |
15 |
int alignFromStart; |
249 |
15 |
int alignToStart; |
250 |
15 |
int alignCount; |
251 |
15 |
try |
252 |
|
{ |
253 |
15 |
alignFromStart = Integer.parseInt(tokens[0]); |
254 |
15 |
alignToStart = Integer.parseInt(tokens[1]); |
255 |
15 |
alignCount = Integer.parseInt(tokens[2]); |
256 |
|
} catch (NumberFormatException nfe) |
257 |
|
{ |
258 |
0 |
System.err.println(nfe.toString()); |
259 |
0 |
return null; |
260 |
|
} |
261 |
|
|
262 |
15 |
int fromStart; |
263 |
15 |
int fromEnd; |
264 |
15 |
int toStart; |
265 |
15 |
int toEnd; |
266 |
|
|
267 |
15 |
if (featureIsOnTarget) |
268 |
|
{ |
269 |
12 |
fromStart = alignToStart; |
270 |
12 |
toStart = alignFromStart; |
271 |
12 |
toEnd = forwardStrand ? toStart + alignCount - 1 |
272 |
|
: toStart - (alignCount - 1); |
273 |
12 |
int toLength = Math.abs(toEnd - toStart) + 1; |
274 |
12 |
int fromLength = toLength * type.getFromRatio() / type.getToRatio(); |
275 |
12 |
fromEnd = fromStart + fromLength - 1; |
276 |
|
} |
277 |
|
else |
278 |
|
{ |
279 |
|
|
280 |
|
|
281 |
3 |
fromStart = alignFromStart; |
282 |
3 |
fromEnd = alignFromStart + alignCount - 1; |
283 |
3 |
int fromLength = fromEnd - fromStart + 1; |
284 |
3 |
int toLength = fromLength * type.getToRatio() / type.getFromRatio(); |
285 |
3 |
toStart = alignToStart; |
286 |
3 |
if (forwardStrand) |
287 |
|
{ |
288 |
1 |
toEnd = toStart + toLength - 1; |
289 |
|
} |
290 |
|
else |
291 |
|
{ |
292 |
2 |
toEnd = toStart - (toLength - 1); |
293 |
|
} |
294 |
|
} |
295 |
|
|
296 |
15 |
MapList codonmapping = constructMappingFromAlign(fromStart, fromEnd, |
297 |
|
toStart, toEnd, type); |
298 |
15 |
return codonmapping; |
299 |
|
} |
300 |
|
|
301 |
|
|
302 |
|
|
303 |
|
|
304 |
|
@param |
305 |
|
@return |
306 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (10) |
Complexity: 7 |
Complexity Density: 1.17 |
|
307 |
18 |
protected static MappingType getMappingType(String model)... |
308 |
|
{ |
309 |
18 |
MappingType result = null; |
310 |
|
|
311 |
18 |
if (model.contains(PROTEIN2DNA) || model.contains(PROTEIN2GENOME)) |
312 |
|
{ |
313 |
13 |
result = MappingType.PeptideToNucleotide; |
314 |
|
} |
315 |
5 |
else if (model.contains(CODING2CODING) || model.contains(CODING2GENOME) |
316 |
|
|| model.contains(CDNA2GENOME) || model.contains(GENOME2GENOME)) |
317 |
|
{ |
318 |
4 |
result = MappingType.NucleotideToNucleotide; |
319 |
|
} |
320 |
18 |
return result; |
321 |
|
} |
322 |
|
|
323 |
|
|
324 |
|
|
325 |
|
|
326 |
|
|
327 |
|
@param |
328 |
|
@return |
329 |
|
|
|
|
| 93.3% |
Uncovered Elements: 1 (15) |
Complexity: 9 |
Complexity Density: 1 |
|
330 |
38 |
public static boolean recognises(String[] columns)... |
331 |
|
{ |
332 |
38 |
if (!SIMILARITY.equalsIgnoreCase(columns[TYPE_COL])) |
333 |
|
{ |
334 |
22 |
return false; |
335 |
|
} |
336 |
|
|
337 |
|
|
338 |
|
|
339 |
|
|
340 |
16 |
String model = columns[SOURCE_COL]; |
341 |
|
|
342 |
16 |
if (model != null) |
343 |
|
{ |
344 |
16 |
String mdl = model.toLowerCase(); |
345 |
16 |
if (mdl.contains(PROTEIN2DNA) || mdl.contains(PROTEIN2GENOME) |
346 |
|
|| mdl.contains(CODING2CODING) || mdl.contains(CODING2GENOME) |
347 |
|
|| mdl.contains(CDNA2GENOME) || mdl.contains(GENOME2GENOME)) |
348 |
|
{ |
349 |
14 |
return true; |
350 |
|
} |
351 |
|
} |
352 |
2 |
System.err.println("Sorry, I don't handle exonerate model " + model); |
353 |
2 |
return false; |
354 |
|
} |
355 |
|
|
356 |
|
|
357 |
|
|
358 |
|
|
359 |
|
|
|
|
| 0% |
Uncovered Elements: 2 (2) |
Complexity: 1 |
Complexity Density: 0.5 |
|
360 |
0 |
@Override... |
361 |
|
protected SequenceFeature buildSequenceFeature(String[] gff, |
362 |
|
Map<String, List<String>> set) |
363 |
|
{ |
364 |
0 |
SequenceFeature sf = super.buildSequenceFeature(gff, TYPE_COL, |
365 |
|
"exonerate", set); |
366 |
|
|
367 |
0 |
return sf; |
368 |
|
} |
369 |
|
|
370 |
|
} |