1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.ws.dbsources; |
22 |
|
|
23 |
|
import java.io.File; |
24 |
|
import java.io.FileInputStream; |
25 |
|
import java.io.InputStream; |
26 |
|
import java.text.ParseException; |
27 |
|
import java.util.ArrayList; |
28 |
|
import java.util.Arrays; |
29 |
|
import java.util.Hashtable; |
30 |
|
import java.util.List; |
31 |
|
import java.util.Locale; |
32 |
|
import java.util.Map; |
33 |
|
import java.util.Map.Entry; |
34 |
|
|
35 |
|
import javax.xml.bind.JAXBContext; |
36 |
|
import javax.xml.bind.JAXBElement; |
37 |
|
import javax.xml.bind.JAXBException; |
38 |
|
import javax.xml.stream.FactoryConfigurationError; |
39 |
|
import javax.xml.stream.XMLInputFactory; |
40 |
|
import javax.xml.stream.XMLStreamException; |
41 |
|
import javax.xml.stream.XMLStreamReader; |
42 |
|
|
43 |
|
import com.stevesoft.pat.Regex; |
44 |
|
|
45 |
|
import jalview.analysis.SequenceIdMatcher; |
46 |
|
import jalview.bin.Console; |
47 |
|
import jalview.datamodel.Alignment; |
48 |
|
import jalview.datamodel.AlignmentI; |
49 |
|
import jalview.datamodel.DBRefEntry; |
50 |
|
import jalview.datamodel.DBRefSource; |
51 |
|
import jalview.datamodel.FeatureProperties; |
52 |
|
import jalview.datamodel.Mapping; |
53 |
|
import jalview.datamodel.Sequence; |
54 |
|
import jalview.datamodel.SequenceFeature; |
55 |
|
import jalview.datamodel.SequenceI; |
56 |
|
import jalview.util.DBRefUtils; |
57 |
|
import jalview.util.DnaUtils; |
58 |
|
import jalview.util.MapList; |
59 |
|
import jalview.util.MappingUtils; |
60 |
|
import jalview.ws.ebi.EBIFetchClient; |
61 |
|
import jalview.xml.binding.embl.EntryType; |
62 |
|
import jalview.xml.binding.embl.EntryType.Feature; |
63 |
|
import jalview.xml.binding.embl.EntryType.Feature.Qualifier; |
64 |
|
import jalview.xml.binding.embl.ROOT; |
65 |
|
import jalview.xml.binding.embl.XrefType; |
66 |
|
|
|
|
| 68.1% |
Uncovered Elements: 120 (376) |
Complexity: 85 |
Complexity Density: 0.33 |
|
67 |
|
public abstract class EmblXmlSource extends EbiFileRetrievedProxy |
68 |
|
{ |
69 |
|
private static final Regex ACCESSION_REGEX = new Regex("^[A-Z]+[0-9]+"); |
70 |
|
|
71 |
|
|
72 |
|
|
73 |
|
|
74 |
|
private static final String EMBL_NOT_FOUND_REPLY = "ERROR 12 No entries found."; |
75 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
76 |
1 |
public EmblXmlSource()... |
77 |
|
{ |
78 |
1 |
super(); |
79 |
|
} |
80 |
|
|
81 |
|
|
82 |
|
|
83 |
|
|
84 |
|
|
85 |
|
@param |
86 |
|
|
87 |
|
@param |
88 |
|
@return |
89 |
|
@throws |
90 |
|
|
|
|
| 0% |
Uncovered Elements: 8 (8) |
Complexity: 2 |
Complexity Density: 0.25 |
|
91 |
0 |
protected AlignmentI getEmblSequenceRecords(String emprefx, String query)... |
92 |
|
throws Exception |
93 |
|
{ |
94 |
0 |
startQuery(); |
95 |
0 |
EBIFetchClient dbFetch = new EBIFetchClient(); |
96 |
0 |
File reply; |
97 |
0 |
try |
98 |
|
{ |
99 |
0 |
reply = dbFetch.fetchDataAsFile( |
100 |
|
emprefx.toLowerCase(Locale.ROOT) + ":" + query.trim(), |
101 |
|
"display=xml", "xml"); |
102 |
|
} catch (Exception e) |
103 |
|
{ |
104 |
0 |
stopQuery(); |
105 |
0 |
throw new Exception( |
106 |
|
String.format("EBI EMBL XML retrieval failed for %s:%s", |
107 |
|
emprefx.toLowerCase(Locale.ROOT), query.trim()), |
108 |
|
e); |
109 |
|
} |
110 |
0 |
return getEmblSequenceRecords(emprefx, query, reply); |
111 |
|
} |
112 |
|
|
113 |
|
|
114 |
|
|
115 |
|
|
116 |
|
@param |
117 |
|
|
118 |
|
|
119 |
|
@param |
120 |
|
@param |
121 |
|
|
122 |
|
@return |
123 |
|
@throws |
124 |
|
|
|
|
| 0% |
Uncovered Elements: 29 (29) |
Complexity: 7 |
Complexity Density: 0.37 |
|
125 |
0 |
protected AlignmentI getEmblSequenceRecords(String emprefx, String query,... |
126 |
|
File reply) throws Exception |
127 |
|
{ |
128 |
0 |
List<EntryType> entries = null; |
129 |
0 |
if (reply != null && reply.exists()) |
130 |
|
{ |
131 |
0 |
file = reply.getAbsolutePath(); |
132 |
0 |
if (reply.length() > EMBL_NOT_FOUND_REPLY.length()) |
133 |
|
{ |
134 |
0 |
InputStream is = new FileInputStream(reply); |
135 |
0 |
entries = getEmblEntries(is); |
136 |
|
} |
137 |
|
} |
138 |
|
|
139 |
|
|
140 |
|
|
141 |
|
|
142 |
|
|
143 |
|
|
144 |
0 |
AlignmentI al = null; |
145 |
0 |
List<SequenceI> seqs = new ArrayList<>(); |
146 |
0 |
List<SequenceI> peptides = new ArrayList<>(); |
147 |
0 |
if (entries != null) |
148 |
|
{ |
149 |
0 |
for (EntryType entry : entries) |
150 |
|
{ |
151 |
0 |
SequenceI seq = getSequence(emprefx, entry, peptides); |
152 |
0 |
if (seq != null) |
153 |
|
{ |
154 |
0 |
seqs.add(seq.deriveSequence()); |
155 |
|
|
156 |
|
} |
157 |
|
} |
158 |
0 |
if (!seqs.isEmpty()) |
159 |
|
{ |
160 |
0 |
al = new Alignment(seqs.toArray(new SequenceI[seqs.size()])); |
161 |
|
} |
162 |
|
else |
163 |
|
{ |
164 |
0 |
jalview.bin.Console.outPrintln( |
165 |
|
"No record found for '" + emprefx + ":" + query + "'"); |
166 |
|
} |
167 |
|
} |
168 |
|
|
169 |
0 |
stopQuery(); |
170 |
0 |
return al; |
171 |
|
} |
172 |
|
|
173 |
|
|
174 |
|
|
175 |
|
|
176 |
|
|
177 |
|
|
178 |
|
|
179 |
|
@return |
180 |
|
|
|
|
| 71.4% |
Uncovered Elements: 6 (21) |
Complexity: 5 |
Complexity Density: 0.33 |
|
181 |
2 |
List<EntryType> getEmblEntries(InputStream is)... |
182 |
|
{ |
183 |
2 |
List<EntryType> entries = new ArrayList<>(); |
184 |
2 |
try |
185 |
|
{ |
186 |
2 |
JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.embl"); |
187 |
2 |
XMLStreamReader streamReader = XMLInputFactory.newInstance() |
188 |
|
.createXMLStreamReader(is); |
189 |
2 |
javax.xml.bind.Unmarshaller um = jc.createUnmarshaller(); |
190 |
2 |
JAXBElement<ROOT> rootElement = um.unmarshal(streamReader, |
191 |
|
ROOT.class); |
192 |
2 |
ROOT root = rootElement.getValue(); |
193 |
|
|
194 |
|
|
195 |
|
|
196 |
|
|
197 |
2 |
if (root == null) |
198 |
|
{ |
199 |
0 |
return entries; |
200 |
|
} |
201 |
2 |
if (root.getEntrySet() != null) |
202 |
|
{ |
203 |
0 |
entries = root.getEntrySet().getEntry(); |
204 |
|
} |
205 |
2 |
else if (root.getEntry() != null) |
206 |
|
{ |
207 |
2 |
entries.add(root.getEntry()); |
208 |
|
} |
209 |
|
} catch (JAXBException | XMLStreamException |
210 |
|
| FactoryConfigurationError e) |
211 |
|
{ |
212 |
0 |
e.printStackTrace(); |
213 |
|
} |
214 |
2 |
return entries; |
215 |
|
} |
216 |
|
|
217 |
|
|
218 |
|
|
219 |
|
|
220 |
|
|
221 |
|
@param |
222 |
|
@param |
223 |
|
@param |
224 |
|
@return |
225 |
|
|
|
|
| 76.7% |
Uncovered Elements: 10 (43) |
Complexity: 8 |
Complexity Density: 0.24 |
|
226 |
1 |
SequenceI getSequence(String sourceDb, EntryType entry,... |
227 |
|
List<SequenceI> peptides) |
228 |
|
{ |
229 |
1 |
String seqString = entry.getSequence(); |
230 |
1 |
if (seqString == null) |
231 |
|
{ |
232 |
0 |
return null; |
233 |
|
} |
234 |
1 |
seqString = seqString.replace(" ", "").replace("\n", "").replace("\t", |
235 |
|
""); |
236 |
1 |
String accession = entry.getAccession(); |
237 |
1 |
SequenceI dna = new Sequence(sourceDb + "|" + accession, seqString); |
238 |
|
|
239 |
1 |
dna.setDescription(entry.getDescription()); |
240 |
1 |
String sequenceVersion = String.valueOf(entry.getVersion().intValue()); |
241 |
1 |
DBRefEntry selfRref = new DBRefEntry(sourceDb, sequenceVersion, |
242 |
|
accession); |
243 |
1 |
dna.addDBRef(selfRref); |
244 |
1 |
selfRref.setMap( |
245 |
|
new Mapping(null, new int[] |
246 |
|
{ 1, dna.getLength() }, new int[] { 1, dna.getLength() }, 1, |
247 |
|
1)); |
248 |
|
|
249 |
|
|
250 |
|
|
251 |
|
|
252 |
1 |
List<XrefType> xrefs = entry.getXref(); |
253 |
1 |
if (xrefs != null) |
254 |
|
{ |
255 |
1 |
for (XrefType xref : xrefs) |
256 |
|
{ |
257 |
2 |
String acc = xref.getId(); |
258 |
2 |
String source = DBRefUtils.getCanonicalName(xref.getDb()); |
259 |
2 |
String version = xref.getSecondaryId(); |
260 |
2 |
if (version == null || "".equals(version)) |
261 |
|
{ |
262 |
1 |
version = "0"; |
263 |
|
} |
264 |
2 |
dna.addDBRef(new DBRefEntry(source, version, acc)); |
265 |
|
} |
266 |
|
} |
267 |
|
|
268 |
1 |
SequenceIdMatcher matcher = new SequenceIdMatcher(peptides); |
269 |
1 |
try |
270 |
|
{ |
271 |
1 |
List<Feature> features = entry.getFeature(); |
272 |
1 |
if (features != null) |
273 |
|
{ |
274 |
1 |
for (Feature feature : features) |
275 |
|
{ |
276 |
3 |
if (FeatureProperties.isCodingFeature(sourceDb, |
277 |
|
feature.getName())) |
278 |
|
{ |
279 |
3 |
parseCodingFeature(entry, feature, sourceDb, dna, peptides, |
280 |
|
matcher); |
281 |
|
} |
282 |
|
} |
283 |
|
} |
284 |
|
} catch (Exception e) |
285 |
|
{ |
286 |
0 |
jalview.bin.Console.errPrintln("EMBL Record Features parsing error!"); |
287 |
0 |
System.err |
288 |
|
.println("Please report the following to help@jalview.org :"); |
289 |
0 |
jalview.bin.Console.errPrintln("EMBL Record " + accession); |
290 |
0 |
jalview.bin.Console |
291 |
|
.errPrintln("Resulted in exception: " + e.getMessage()); |
292 |
0 |
e.printStackTrace(System.err); |
293 |
|
} |
294 |
|
|
295 |
1 |
return dna; |
296 |
|
} |
297 |
|
|
298 |
|
|
299 |
|
|
300 |
|
|
301 |
|
|
302 |
|
@param |
303 |
|
@param |
304 |
|
@param |
305 |
|
@param |
306 |
|
@param |
307 |
|
@param |
308 |
|
|
|
|
| 78.8% |
Uncovered Elements: 39 (184) |
Complexity: 39 |
Complexity Density: 0.31 |
|
309 |
3 |
void parseCodingFeature(EntryType entry, Feature feature, String sourceDb,... |
310 |
|
SequenceI dna, List<SequenceI> peptides, |
311 |
|
SequenceIdMatcher matcher) |
312 |
|
{ |
313 |
3 |
final boolean isEmblCdna = sourceDb.equals(DBRefSource.EMBLCDS); |
314 |
3 |
final String accession = entry.getAccession(); |
315 |
3 |
final String sequenceVersion = entry.getVersion().toString(); |
316 |
|
|
317 |
3 |
int[] exons = getCdsRanges(entry.getAccession(), feature); |
318 |
|
|
319 |
3 |
String translation = null; |
320 |
3 |
String proteinName = ""; |
321 |
3 |
String proteinId = null; |
322 |
3 |
Map<String, String> vals = new Hashtable<>(); |
323 |
|
|
324 |
|
|
325 |
|
|
326 |
|
|
327 |
|
|
328 |
3 |
int codonStart = 1; |
329 |
|
|
330 |
|
|
331 |
|
|
332 |
|
|
333 |
|
|
334 |
3 |
if (feature.getQualifier() != null) |
335 |
|
{ |
336 |
3 |
for (Qualifier q : feature.getQualifier()) |
337 |
|
{ |
338 |
7 |
String qname = q.getName(); |
339 |
7 |
String value = q.getValue(); |
340 |
7 |
value = value == null ? "" |
341 |
|
: value.trim().replace(" ", "").replace("\n", "") |
342 |
|
.replace("\t", ""); |
343 |
7 |
if (qname.equals("translation")) |
344 |
|
{ |
345 |
3 |
translation = value; |
346 |
|
} |
347 |
4 |
else if (qname.equals("protein_id")) |
348 |
|
{ |
349 |
3 |
proteinId = value; |
350 |
|
} |
351 |
1 |
else if (qname.equals("codon_start")) |
352 |
|
{ |
353 |
0 |
try |
354 |
|
{ |
355 |
0 |
codonStart = Integer.parseInt(value.trim()); |
356 |
|
} catch (NumberFormatException e) |
357 |
|
{ |
358 |
0 |
jalview.bin.Console.errPrintln("Invalid codon_start in XML for " |
359 |
|
+ entry.getAccession() + ": " + e.getMessage()); |
360 |
|
} |
361 |
|
} |
362 |
1 |
else if (qname.equals("product")) |
363 |
|
{ |
364 |
|
|
365 |
0 |
proteinName = value; |
366 |
|
} |
367 |
|
else |
368 |
|
{ |
369 |
|
|
370 |
1 |
if (!"".equals(value)) |
371 |
|
{ |
372 |
1 |
vals.put(qname, value); |
373 |
|
} |
374 |
|
} |
375 |
|
} |
376 |
|
} |
377 |
|
|
378 |
3 |
DBRefEntry proteinToEmblProteinRef = null; |
379 |
3 |
exons = MappingUtils.removeStartPositions(codonStart - 1, exons); |
380 |
|
|
381 |
3 |
SequenceI product = null; |
382 |
3 |
Mapping dnaToProteinMapping = null; |
383 |
3 |
if (translation != null && proteinName != null && proteinId != null) |
384 |
|
{ |
385 |
3 |
int translationLength = translation.length(); |
386 |
|
|
387 |
|
|
388 |
|
|
389 |
|
|
390 |
3 |
product = matcher.findIdMatch(proteinId); |
391 |
3 |
if (product == null) |
392 |
|
{ |
393 |
3 |
product = new Sequence(proteinId, translation, 1, |
394 |
|
translationLength); |
395 |
3 |
product.setDescription(((proteinName.length() == 0) |
396 |
|
? "Protein Product from " + sourceDb |
397 |
|
: proteinName)); |
398 |
3 |
peptides.add(product); |
399 |
3 |
matcher.add(product); |
400 |
|
} |
401 |
|
|
402 |
|
|
403 |
|
|
404 |
3 |
if (exons == null || exons.length == 0) |
405 |
|
{ |
406 |
|
|
407 |
|
|
408 |
|
|
409 |
|
|
410 |
0 |
jalview.bin.Console.errPrintln( |
411 |
|
"Implementation Notice: EMBLCDS records not properly supported yet - Making up the CDNA region of this sequence... may be incorrect (" |
412 |
|
+ sourceDb + ":" + entry.getAccession() + ")"); |
413 |
0 |
int dnaLength = dna.getLength(); |
414 |
0 |
if (translationLength * 3 == (1 - codonStart + dnaLength)) |
415 |
|
{ |
416 |
0 |
jalview.bin.Console.errPrintln( |
417 |
|
"Not allowing for additional stop codon at end of cDNA fragment... !"); |
418 |
|
|
419 |
0 |
exons = new int[] { dna.getStart() + (codonStart - 1), |
420 |
|
dna.getEnd() }; |
421 |
0 |
dnaToProteinMapping = new Mapping(product, exons, |
422 |
|
new int[] |
423 |
|
{ 1, translationLength }, 3, 1); |
424 |
|
} |
425 |
0 |
if ((translationLength + 1) * 3 == (1 - codonStart + dnaLength)) |
426 |
|
{ |
427 |
0 |
jalview.bin.Console.errPrintln( |
428 |
|
"Allowing for additional stop codon at end of cDNA fragment... will probably cause an error in VAMSAs!"); |
429 |
0 |
exons = new int[] { dna.getStart() + (codonStart - 1), |
430 |
|
dna.getEnd() - 3 }; |
431 |
0 |
dnaToProteinMapping = new Mapping(product, exons, |
432 |
|
new int[] |
433 |
|
{ 1, translationLength }, 3, 1); |
434 |
|
} |
435 |
|
} |
436 |
|
else |
437 |
|
{ |
438 |
|
|
439 |
|
|
440 |
|
|
441 |
3 |
if (isEmblCdna) |
442 |
|
{ |
443 |
|
|
444 |
|
|
445 |
|
|
446 |
|
|
447 |
|
|
448 |
|
} |
449 |
|
else |
450 |
|
{ |
451 |
|
|
452 |
3 |
int[] exons2 = adjustForProteinLength(translationLength, exons); |
453 |
3 |
dnaToProteinMapping = new Mapping(product, exons2, |
454 |
|
new int[] |
455 |
|
{ 1, translationLength }, 3, 1); |
456 |
3 |
if (product != null) |
457 |
|
{ |
458 |
|
|
459 |
|
|
460 |
|
|
461 |
3 |
DBRefEntry proteinToEmblRef = new DBRefEntry(DBRefSource.EMBL, |
462 |
|
sequenceVersion, proteinId, |
463 |
|
new Mapping(dnaToProteinMapping.getMap().getInverse())); |
464 |
3 |
product.addDBRef(proteinToEmblRef); |
465 |
|
|
466 |
|
|
467 |
|
|
468 |
|
|
469 |
|
|
470 |
3 |
MapList proteinToCdsMapList = new MapList( |
471 |
|
new int[] |
472 |
|
{ 1, translationLength }, |
473 |
|
new int[] |
474 |
|
{ 1 + (codonStart - 1), |
475 |
|
(codonStart - 1) + 3 * translationLength }, |
476 |
|
1, 3); |
477 |
3 |
DBRefEntry proteinToEmblCdsRef = new DBRefEntry( |
478 |
|
DBRefSource.EMBLCDS, sequenceVersion, proteinId, |
479 |
|
new Mapping(proteinToCdsMapList)); |
480 |
3 |
product.addDBRef(proteinToEmblCdsRef); |
481 |
|
|
482 |
|
|
483 |
|
|
484 |
|
|
485 |
3 |
proteinToEmblProteinRef = new DBRefEntry(proteinToEmblCdsRef); |
486 |
3 |
proteinToEmblProteinRef.setSource(DBRefSource.EMBLCDSProduct); |
487 |
3 |
proteinToEmblProteinRef.setMap(null); |
488 |
3 |
product.addDBRef(proteinToEmblProteinRef); |
489 |
|
} |
490 |
|
} |
491 |
|
} |
492 |
|
|
493 |
|
|
494 |
|
|
495 |
|
|
496 |
3 |
String cds = feature.getName(); |
497 |
7 |
for (int xint = 0; exons != null |
498 |
|
&& xint < exons.length - 1; xint += 2) |
499 |
|
{ |
500 |
4 |
int exonStart = exons[xint]; |
501 |
4 |
int exonEnd = exons[xint + 1]; |
502 |
4 |
int begin = Math.min(exonStart, exonEnd); |
503 |
4 |
int end = Math.max(exonStart, exonEnd); |
504 |
4 |
int exonNumber = xint / 2 + 1; |
505 |
4 |
String desc = String.format("Exon %d for protein '%s' EMBLCDS:%s", |
506 |
|
exonNumber, proteinName, proteinId); |
507 |
|
|
508 |
4 |
SequenceFeature sf = makeCdsFeature(cds, desc, begin, end, sourceDb, |
509 |
|
vals); |
510 |
|
|
511 |
4 |
sf.setEnaLocation(feature.getLocation()); |
512 |
4 |
boolean forwardStrand = exonStart <= exonEnd; |
513 |
4 |
sf.setStrand(forwardStrand ? "+" : "-"); |
514 |
4 |
sf.setPhase(String.valueOf(codonStart - 1)); |
515 |
4 |
sf.setValue(FeatureProperties.EXONPOS, exonNumber); |
516 |
4 |
sf.setValue(FeatureProperties.EXONPRODUCT, proteinName); |
517 |
|
|
518 |
4 |
dna.addSequenceFeature(sf); |
519 |
|
} |
520 |
|
} |
521 |
|
|
522 |
|
|
523 |
|
|
524 |
|
|
525 |
3 |
boolean hasUniprotDbref = false; |
526 |
3 |
List<XrefType> xrefs = feature.getXref(); |
527 |
3 |
if (xrefs != null) |
528 |
|
{ |
529 |
3 |
boolean mappingUsed = false; |
530 |
3 |
for (XrefType xref : xrefs) |
531 |
|
{ |
532 |
|
|
533 |
|
|
534 |
|
|
535 |
3 |
String source = DBRefUtils.getCanonicalName(xref.getDb()); |
536 |
3 |
String version = xref.getSecondaryId(); |
537 |
3 |
if (version == null || "".equals(version)) |
538 |
|
{ |
539 |
2 |
version = "0"; |
540 |
|
} |
541 |
3 |
DBRefEntry dbref = new DBRefEntry(source, version, xref.getId()); |
542 |
3 |
DBRefEntry proteinDbRef = new DBRefEntry(source, version, |
543 |
|
dbref.getAccessionId()); |
544 |
3 |
if (source.equals(DBRefSource.UNIPROT)) |
545 |
|
{ |
546 |
3 |
String proteinSeqName = DBRefSource.UNIPROT + "|" |
547 |
|
+ dbref.getAccessionId(); |
548 |
3 |
if (dnaToProteinMapping != null |
549 |
|
&& dnaToProteinMapping.getTo() != null) |
550 |
|
{ |
551 |
3 |
if (mappingUsed) |
552 |
|
{ |
553 |
|
|
554 |
|
|
555 |
|
|
556 |
|
|
557 |
1 |
dnaToProteinMapping = new Mapping(dnaToProteinMapping); |
558 |
|
} |
559 |
3 |
mappingUsed = true; |
560 |
|
|
561 |
|
|
562 |
|
|
563 |
|
|
564 |
|
|
565 |
|
|
566 |
3 |
SequenceI proteinSeq = matcher.findIdMatch(proteinSeqName); |
567 |
3 |
if (proteinSeq == null) |
568 |
|
{ |
569 |
3 |
proteinSeq = new Sequence(proteinSeqName, |
570 |
|
product.getSequenceAsString()); |
571 |
3 |
matcher.add(proteinSeq); |
572 |
3 |
proteinSeq.setDescription(product.getDescription()); |
573 |
3 |
peptides.add(proteinSeq); |
574 |
|
} |
575 |
3 |
dnaToProteinMapping.setTo(proteinSeq); |
576 |
3 |
dnaToProteinMapping.setMappedFromId(proteinId); |
577 |
3 |
proteinSeq.addDBRef(proteinDbRef); |
578 |
3 |
dbref.setMap(dnaToProteinMapping); |
579 |
|
} |
580 |
3 |
hasUniprotDbref = true; |
581 |
|
} |
582 |
3 |
if (product != null) |
583 |
|
{ |
584 |
|
|
585 |
|
|
586 |
|
|
587 |
3 |
DBRefEntry pref = proteinDbRef; |
588 |
3 |
pref.setMap(null); |
589 |
3 |
product.addDBRef(pref); |
590 |
|
|
591 |
3 |
if (dnaToProteinMapping != null) |
592 |
|
{ |
593 |
3 |
Mapping pmap = new Mapping(dna, |
594 |
|
dnaToProteinMapping.getMap().getInverse()); |
595 |
3 |
pref = new DBRefEntry(sourceDb, sequenceVersion, accession); |
596 |
3 |
pref.setMap(pmap); |
597 |
3 |
if (dnaToProteinMapping.getTo() != null) |
598 |
|
{ |
599 |
3 |
dnaToProteinMapping.getTo().addDBRef(pref); |
600 |
|
} |
601 |
|
} |
602 |
|
} |
603 |
3 |
dna.addDBRef(dbref); |
604 |
|
} |
605 |
|
} |
606 |
|
|
607 |
|
|
608 |
|
|
609 |
|
|
610 |
|
|
611 |
|
|
612 |
3 |
if (!hasUniprotDbref && product != null) |
613 |
|
{ |
614 |
1 |
if (proteinToEmblProteinRef == null) |
615 |
|
{ |
616 |
|
|
617 |
0 |
proteinToEmblProteinRef = new DBRefEntry(DBRefSource.EMBLCDSProduct, |
618 |
|
sequenceVersion, proteinId); |
619 |
|
} |
620 |
1 |
product.addDBRef(proteinToEmblProteinRef); |
621 |
|
|
622 |
1 |
if (dnaToProteinMapping != null |
623 |
|
&& dnaToProteinMapping.getTo() != null) |
624 |
|
{ |
625 |
1 |
DBRefEntry dnaToEmblProteinRef = new DBRefEntry( |
626 |
|
DBRefSource.EMBLCDSProduct, sequenceVersion, proteinId); |
627 |
1 |
dnaToEmblProteinRef.setMap(dnaToProteinMapping); |
628 |
1 |
dnaToProteinMapping.setMappedFromId(proteinId); |
629 |
1 |
dna.addDBRef(dnaToEmblProteinRef); |
630 |
|
} |
631 |
|
} |
632 |
|
} |
633 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
634 |
0 |
@Override... |
635 |
|
public boolean isDnaCoding() |
636 |
|
{ |
637 |
0 |
return true; |
638 |
|
} |
639 |
|
|
640 |
|
|
641 |
|
|
642 |
|
|
643 |
|
|
644 |
|
@param |
645 |
|
@param |
646 |
|
@return |
647 |
|
|
|
|
| 60% |
Uncovered Elements: 4 (10) |
Complexity: 3 |
Complexity Density: 0.38 |
|
648 |
4 |
protected int[] getCdsRanges(String accession, Feature feature)... |
649 |
|
{ |
650 |
4 |
String location = feature.getLocation(); |
651 |
4 |
if (location == null) |
652 |
|
{ |
653 |
0 |
return new int[] {}; |
654 |
|
} |
655 |
|
|
656 |
4 |
try |
657 |
|
{ |
658 |
4 |
List<int[]> ranges = DnaUtils.parseLocation(location); |
659 |
4 |
return listToArray(ranges); |
660 |
|
} catch (ParseException e) |
661 |
|
{ |
662 |
0 |
Console.warn( |
663 |
|
String.format("Not parsing inexact CDS location %s in ENA %s", |
664 |
|
location, accession)); |
665 |
0 |
return new int[] {}; |
666 |
|
} |
667 |
|
} |
668 |
|
|
669 |
|
|
670 |
|
|
671 |
|
|
672 |
|
|
673 |
|
@param |
674 |
|
@return |
675 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (6) |
Complexity: 1 |
Complexity Density: 0.17 |
|
676 |
4 |
int[] listToArray(List<int[]> ranges)... |
677 |
|
{ |
678 |
4 |
int[] result = new int[ranges.size() * 2]; |
679 |
4 |
int i = 0; |
680 |
4 |
for (int[] range : ranges) |
681 |
|
{ |
682 |
9 |
result[i++] = range[0]; |
683 |
9 |
result[i++] = range[1]; |
684 |
|
} |
685 |
4 |
return result; |
686 |
|
} |
687 |
|
|
688 |
|
|
689 |
|
|
690 |
|
|
691 |
|
@param |
692 |
|
|
693 |
|
@param |
694 |
|
|
695 |
|
@param |
696 |
|
|
697 |
|
@param |
698 |
|
|
699 |
|
@param |
700 |
|
|
701 |
|
@param |
702 |
|
|
703 |
|
@return |
704 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (7) |
Complexity: 2 |
Complexity Density: 0.4 |
|
705 |
4 |
protected SequenceFeature makeCdsFeature(String type, String desc,... |
706 |
|
int begin, int end, String group, Map<String, String> vals) |
707 |
|
{ |
708 |
4 |
SequenceFeature sf = new SequenceFeature(type, desc, begin, end, group); |
709 |
4 |
if (!vals.isEmpty()) |
710 |
|
{ |
711 |
1 |
for (Entry<String, String> val : vals.entrySet()) |
712 |
|
{ |
713 |
1 |
sf.setValue(val.getKey(), val.getValue()); |
714 |
|
} |
715 |
|
} |
716 |
4 |
return sf; |
717 |
|
} |
718 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
719 |
0 |
@Override... |
720 |
|
public String getAccessionSeparator() |
721 |
|
{ |
722 |
0 |
return null; |
723 |
|
} |
724 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
725 |
0 |
@Override... |
726 |
|
public Regex getAccessionValidator() |
727 |
|
{ |
728 |
0 |
return ACCESSION_REGEX; |
729 |
|
} |
730 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
731 |
0 |
@Override... |
732 |
|
public String getDbVersion() |
733 |
|
{ |
734 |
0 |
return "0"; |
735 |
|
} |
736 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
737 |
0 |
@Override... |
738 |
|
public int getTier() |
739 |
|
{ |
740 |
0 |
return 0; |
741 |
|
} |
742 |
|
|
|
|
| 0% |
Uncovered Elements: 5 (5) |
Complexity: 3 |
Complexity Density: 1 |
|
743 |
0 |
@Override... |
744 |
|
public boolean isValidReference(String accession) |
745 |
|
{ |
746 |
0 |
if (accession == null || accession.length() < 2) |
747 |
|
{ |
748 |
0 |
return false; |
749 |
|
} |
750 |
0 |
return getAccessionValidator().search(accession); |
751 |
|
} |
752 |
|
|
753 |
|
|
754 |
|
|
755 |
|
|
756 |
|
|
757 |
|
@param |
758 |
|
@param |
759 |
|
|
760 |
|
@return |
761 |
|
|
|
|
| 85.4% |
Uncovered Elements: 6 (41) |
Complexity: 9 |
Complexity Density: 0.33 |
|
762 |
9 |
static int[] adjustForProteinLength(int proteinLength, int[] exon)... |
763 |
|
{ |
764 |
9 |
if (proteinLength <= 0 || exon == null) |
765 |
|
{ |
766 |
0 |
return exon; |
767 |
|
} |
768 |
9 |
int expectedCdsLength = proteinLength * 3; |
769 |
9 |
int exonLength = MappingUtils.getLength(Arrays.asList(exon)); |
770 |
|
|
771 |
|
|
772 |
|
|
773 |
|
|
774 |
9 |
if (expectedCdsLength >= exonLength) |
775 |
|
{ |
776 |
5 |
return exon; |
777 |
|
} |
778 |
|
|
779 |
4 |
int origxon[]; |
780 |
4 |
int sxpos = -1; |
781 |
4 |
int endxon = 0; |
782 |
4 |
origxon = new int[exon.length]; |
783 |
4 |
System.arraycopy(exon, 0, origxon, 0, exon.length); |
784 |
4 |
int cdspos = 0; |
785 |
10 |
for (int x = 0; x < exon.length; x += 2) |
786 |
|
{ |
787 |
10 |
cdspos += Math.abs(exon[x + 1] - exon[x]) + 1; |
788 |
10 |
if (expectedCdsLength <= cdspos) |
789 |
|
{ |
790 |
|
|
791 |
4 |
sxpos = x; |
792 |
4 |
if (expectedCdsLength != cdspos) |
793 |
|
{ |
794 |
|
|
795 |
|
|
796 |
|
|
797 |
|
} |
798 |
|
|
799 |
|
|
800 |
|
|
801 |
|
|
802 |
|
|
803 |
4 |
if (exon[x + 1] >= exon[x]) |
804 |
|
{ |
805 |
4 |
endxon = exon[x + 1] - cdspos + expectedCdsLength; |
806 |
|
} |
807 |
|
else |
808 |
|
{ |
809 |
0 |
endxon = exon[x + 1] + cdspos - expectedCdsLength; |
810 |
|
} |
811 |
4 |
break; |
812 |
|
} |
813 |
|
} |
814 |
|
|
815 |
4 |
if (sxpos != -1) |
816 |
|
{ |
817 |
|
|
818 |
4 |
int[] nxon = new int[sxpos + 2]; |
819 |
4 |
System.arraycopy(exon, 0, nxon, 0, sxpos + 2); |
820 |
4 |
nxon[sxpos + 1] = endxon; |
821 |
|
|
822 |
4 |
exon = nxon; |
823 |
|
} |
824 |
4 |
return exon; |
825 |
|
} |
826 |
|
|
827 |
|
} |