Clover icon

Coverage Report

  1. Project Clover database Thu Dec 4 2025 16:11:35 GMT
  2. Package jalview.ws.dbsources

File EmblXmlSource.java

 

Coverage histogram

../../../img/srcFileCovDistChart7.png
30% of files have more coverage

Code metrics

104
256
16
1
840
572
85
0.33
16
16
5.31

Classes

Class Line # Actions
EmblXmlSource 78 256 85
0.6808510468.1%
 

Contributing tests

No tests hitting this source file were found.

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.ws.dbsources;
22   
23    import java.util.Locale;
24   
25    import java.io.File;
26    import java.io.FileInputStream;
27    import java.io.InputStream;
28    import java.text.ParseException;
29    import java.util.ArrayList;
30    import java.util.Arrays;
31    import java.util.Hashtable;
32    import java.util.List;
33    import java.util.Map;
34    import java.util.Map.Entry;
35   
36    import javax.xml.bind.JAXBContext;
37    import javax.xml.bind.JAXBElement;
38    import javax.xml.bind.JAXBException;
39    import javax.xml.stream.FactoryConfigurationError;
40    import javax.xml.stream.XMLInputFactory;
41    import javax.xml.stream.XMLStreamException;
42    import javax.xml.stream.XMLStreamReader;
43   
44   
45    import jalview.analysis.SequenceIdMatcher;
46    import jalview.bin.Console;
47    import jalview.datamodel.Alignment;
48    import jalview.datamodel.AlignmentI;
49    import jalview.datamodel.DBRefEntry;
50    import jalview.datamodel.DBRefSource;
51    import jalview.datamodel.FeatureProperties;
52    import jalview.datamodel.Mapping;
53    import jalview.datamodel.Sequence;
54    import jalview.datamodel.SequenceFeature;
55    import jalview.datamodel.SequenceI;
56    import jalview.util.DBRefUtils;
57    import jalview.util.DnaUtils;
58    import jalview.util.MapList;
59    import jalview.util.MappingUtils;
60    import jalview.util.MessageManager;
61    import jalview.util.Platform;
62    import jalview.ws.ebi.EBIFetchClient;
63    import jalview.xml.binding.embl.EntryType;
64    import jalview.xml.binding.embl.EntryType.Feature;
65    import jalview.xml.binding.embl.EntryType.Feature.Qualifier;
66    import jalview.xml.binding.embl.ROOT;
67    import jalview.xml.binding.embl.XrefType;
68   
69    import com.stevesoft.pat.Regex;
70   
71    /**
72    * Provides XML binding and parsing of EMBL or EMBLCDS records retrieved from
73    * (e.g.) {@code https://www.ebi.ac.uk/ena/data/view/x53828&display=xml}.
74    *
75    * @deprecated endpoint withdrawn August 2020 (JAL-3692), use EmblFlatfileSource
76    */
77    @Deprecated
 
78    public abstract class EmblXmlSource extends EbiFileRetrievedProxy
79    {
80    // TODO: delete class or update tyhis validator for 2.12 style Platform.regex
81    private static final Regex ACCESSION_REGEX = Platform.newRegex("^[A-Z]+[0-9]+");
82   
83    /*
84    * JAL-1856 Embl returns this text for query not found
85    */
86    private static final String EMBL_NOT_FOUND_REPLY = "ERROR 12 No entries found.";
87   
 
88  1 toggle public EmblXmlSource()
89    {
90  1 super();
91    }
92   
93    /**
94    * Retrieves and parses an emblxml file, and returns an alignment containing
95    * the parsed sequences, or null if none were found
96    *
97    * @param emprefx
98    * "EMBL" or "EMBLCDS" - anything else will not retrieve emblxml
99    * @param query
100    * @return
101    * @throws Exception
102    */
 
103  0 toggle protected AlignmentI getEmblSequenceRecords(String emprefx, String query)
104    throws Exception
105    {
106  0 startQuery();
107  0 EBIFetchClient dbFetch = new EBIFetchClient();
108  0 File reply;
109  0 try
110    {
111  0 reply = dbFetch.fetchDataAsFile(
112    emprefx.toLowerCase(Locale.ROOT) + ":" + query.trim(), "display=xml",
113    "xml");
114    } catch (Exception e)
115    {
116  0 stopQuery();
117  0 throw new Exception(
118    String.format("EBI EMBL XML retrieval failed for %s:%s",
119    emprefx.toLowerCase(Locale.ROOT), query.trim()),
120    e);
121    }
122  0 return getEmblSequenceRecords(emprefx, query, reply);
123    }
124   
125    /**
126    * parse an emblxml file stored locally
127    *
128    * @param emprefx
129    * either EMBL or EMBLCDS strings are allowed - anything else will
130    * not retrieve emblxml
131    * @param query
132    * @param file
133    * the EMBL XML file containing the results of a query
134    * @return
135    * @throws Exception
136    */
 
137  0 toggle protected AlignmentI getEmblSequenceRecords(String emprefx, String query,
138    File reply) throws Exception
139    {
140  0 List<EntryType> entries = null;
141  0 if (reply != null && reply.exists())
142    {
143  0 file = reply.getAbsolutePath();
144  0 if (reply.length() > EMBL_NOT_FOUND_REPLY.length())
145    {
146  0 InputStream is = new FileInputStream(reply);
147  0 entries = getEmblEntries(is);
148    }
149    }
150   
151    /*
152    * invalid accession gets a reply with no <entry> elements, text content of
153    * EmbFile reads something like (e.g.) this ungrammatical phrase
154    * Entry: <acc> display type is either not supported or entry is not found.
155    */
156  0 AlignmentI al = null;
157  0 List<SequenceI> seqs = new ArrayList<>();
158  0 List<SequenceI> peptides = new ArrayList<>();
159  0 if (entries != null)
160    {
161  0 for (EntryType entry : entries)
162    {
163  0 SequenceI seq = getSequence(emprefx, entry, peptides);
164  0 if (seq != null)
165    {
166  0 seqs.add(seq.deriveSequence());
167    // place DBReferences on dataset and refer
168    }
169    }
170  0 if (!seqs.isEmpty())
171    {
172  0 al = new Alignment(seqs.toArray(new SequenceI[seqs.size()]));
173    }
174    else
175    {
176  0 jalview.bin.Console.outPrintln(
177    "No record found for '" + emprefx + ":" + query + "'");
178    }
179    }
180   
181  0 stopQuery();
182  0 return al;
183    }
184   
185    /**
186    * Reads the XML reply from file and unmarshals it to Java objects. Answers a
187    * (possibly empty) list of <code>EntryType</code> objects.
188    *
189    * is
190    *
191    * @return
192    */
 
193  2 toggle List<EntryType> getEmblEntries(InputStream is)
194    {
195  2 List<EntryType> entries = new ArrayList<>();
196  2 try
197    {
198  2 JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.embl");
199  2 XMLStreamReader streamReader = XMLInputFactory.newInstance()
200    .createXMLStreamReader(is);
201  2 javax.xml.bind.Unmarshaller um = jc.createUnmarshaller();
202  2 JAXBElement<ROOT> rootElement = um.unmarshal(streamReader,
203    ROOT.class);
204  2 ROOT root = rootElement.getValue();
205   
206    /*
207    * document root contains either "entry" or "entrySet"
208    */
209  2 if (root == null)
210    {
211  0 return entries;
212    }
213  2 if (root.getEntrySet() != null)
214    {
215  0 entries = root.getEntrySet().getEntry();
216    }
217  2 else if (root.getEntry() != null)
218    {
219  2 entries.add(root.getEntry());
220    }
221    } catch (JAXBException | XMLStreamException
222    | FactoryConfigurationError e)
223    {
224  0 e.printStackTrace();
225    }
226  2 return entries;
227    }
228   
229    /**
230    * A helper method to parse XML data and construct a sequence, with any
231    * available database references and features
232    *
233    * @param emprefx
234    * @param entry
235    * @param peptides
236    * @return
237    */
 
238  1 toggle SequenceI getSequence(String sourceDb, EntryType entry,
239    List<SequenceI> peptides)
240    {
241  1 String seqString = entry.getSequence();
242  1 if (seqString == null)
243    {
244  0 return null;
245    }
246  1 seqString = seqString.replace(" ", "").replace("\n", "").replace("\t",
247    "");
248  1 String accession = entry.getAccession();
249  1 SequenceI dna = new Sequence(sourceDb + "|" + accession, seqString);
250   
251  1 dna.setDescription(entry.getDescription());
252  1 String sequenceVersion = String.valueOf(entry.getVersion().intValue());
253  1 DBRefEntry selfRref = new DBRefEntry(sourceDb, sequenceVersion,
254    accession);
255  1 dna.addDBRef(selfRref);
256  1 selfRref.setMap(
257    new Mapping(null, new int[]
258    { 1, dna.getLength() }, new int[] { 1, dna.getLength() }, 1,
259    1));
260   
261    /*
262    * add db references
263    */
264  1 List<XrefType> xrefs = entry.getXref();
265  1 if (xrefs != null)
266    {
267  1 for (XrefType xref : xrefs)
268    {
269  2 String acc = xref.getId();
270  2 String source = DBRefUtils.getCanonicalName(xref.getDb());
271  2 String version = xref.getSecondaryId();
272  2 if (version == null || "".equals(version))
273    {
274  1 version = "0";
275    }
276  2 dna.addDBRef(new DBRefEntry(source, version, acc));
277    }
278    }
279   
280  1 SequenceIdMatcher matcher = new SequenceIdMatcher(peptides);
281  1 try
282    {
283  1 List<Feature> features = entry.getFeature();
284  1 if (features != null)
285    {
286  1 for (Feature feature : features)
287    {
288  3 if (FeatureProperties.isCodingFeature(sourceDb,
289    feature.getName()))
290    {
291  3 parseCodingFeature(entry, feature, sourceDb, dna, peptides,
292    matcher);
293    }
294    }
295    }
296    } catch (Exception e)
297    {
298  0 jalview.bin.Console.errPrintln("EMBL Record Features parsing error!");
299  0 System.err
300    .println("Please report the following to help@jalview.org :");
301  0 jalview.bin.Console.errPrintln("EMBL Record " + accession);
302  0 jalview.bin.Console
303    .errPrintln("Resulted in exception: " + e.getMessage());
304  0 e.printStackTrace(System.err);
305    }
306   
307  1 return dna;
308    }
309   
310    /**
311    * Extracts coding region and product from a CDS feature and decorates it with
312    * annotations
313    *
314    * @param entry
315    * @param feature
316    * @param sourceDb
317    * @param dna
318    * @param peptides
319    * @param matcher
320    */
 
321  3 toggle void parseCodingFeature(EntryType entry, Feature feature, String sourceDb,
322    SequenceI dna, List<SequenceI> peptides,
323    SequenceIdMatcher matcher)
324    {
325  3 final boolean isEmblCdna = sourceDb.equals(DBRefSource.EMBLCDS);
326  3 final String accession = entry.getAccession();
327  3 final String sequenceVersion = entry.getVersion().toString();
328   
329  3 int[] exons = getCdsRanges(entry.getAccession(), feature);
330   
331  3 String translation = null;
332  3 String proteinName = "";
333  3 String proteinId = null;
334  3 Map<String, String> vals = new Hashtable<>();
335   
336    /*
337    * codon_start 1/2/3 in EMBL corresponds to phase 0/1/2 in CDS
338    * (phase is required for CDS features in GFF3 format)
339    */
340  3 int codonStart = 1;
341   
342    /*
343    * parse qualifiers, saving protein translation, protein id,
344    * codon start position, product (name), and 'other values'
345    */
346  3 if (feature.getQualifier() != null)
347    {
348  3 for (Qualifier q : feature.getQualifier())
349    {
350  7 String qname = q.getName();
351  7 String value = q.getValue();
352  7 value = value == null ? ""
353    : value.trim().replace(" ", "").replace("\n", "")
354    .replace("\t", "");
355  7 if (qname.equals("translation"))
356    {
357  3 translation = value;
358    }
359  4 else if (qname.equals("protein_id"))
360    {
361  3 proteinId = value;
362    }
363  1 else if (qname.equals("codon_start"))
364    {
365  0 try
366    {
367  0 codonStart = Integer.parseInt(value.trim());
368    } catch (NumberFormatException e)
369    {
370  0 jalview.bin.Console.errPrintln("Invalid codon_start in XML for "
371    + entry.getAccession() + ": " + e.getMessage());
372    }
373    }
374  1 else if (qname.equals("product"))
375    {
376    // sometimes name is returned e.g. for V00488
377  0 proteinName = value;
378    }
379    else
380    {
381    // throw anything else into the additional properties hash
382  1 if (!"".equals(value))
383    {
384  1 vals.put(qname, value);
385    }
386    }
387    }
388    }
389   
390  3 DBRefEntry proteinToEmblProteinRef = null;
391  3 exons = MappingUtils.removeStartPositions(codonStart - 1, exons);
392   
393  3 SequenceI product = null;
394  3 Mapping dnaToProteinMapping = null;
395  3 if (translation != null && proteinName != null && proteinId != null)
396    {
397  3 int translationLength = translation.length();
398   
399    /*
400    * look for product in peptides list, if not found, add it
401    */
402  3 product = matcher.findIdMatch(proteinId);
403  3 if (product == null)
404    {
405  3 product = new Sequence(proteinId, translation, 1,
406    translationLength);
407  3 product.setDescription(((proteinName.length() == 0)
408    ? "Protein Product from " + sourceDb
409    : proteinName));
410  3 peptides.add(product);
411  3 matcher.add(product);
412    }
413   
414    // we have everything - create the mapping and perhaps the protein
415    // sequence
416  3 if (exons == null || exons.length == 0)
417    {
418    /*
419    * workaround until we handle dna location for CDS sequence
420    * e.g. location="X53828.1:60..1058" correctly
421    */
422  0 jalview.bin.Console.errPrintln(
423    "Implementation Notice: EMBLCDS records not properly supported yet - Making up the CDNA region of this sequence... may be incorrect ("
424    + sourceDb + ":" + entry.getAccession() + ")");
425  0 int dnaLength = dna.getLength();
426  0 if (translationLength * 3 == (1 - codonStart + dnaLength))
427    {
428  0 jalview.bin.Console.errPrintln(
429    "Not allowing for additional stop codon at end of cDNA fragment... !");
430    // this might occur for CDS sequences where no features are marked
431  0 exons = new int[] { dna.getStart() + (codonStart - 1),
432    dna.getEnd() };
433  0 dnaToProteinMapping = new Mapping(product, exons,
434    new int[]
435    { 1, translationLength }, 3, 1);
436    }
437  0 if ((translationLength + 1) * 3 == (1 - codonStart + dnaLength))
438    {
439  0 jalview.bin.Console.errPrintln(
440    "Allowing for additional stop codon at end of cDNA fragment... will probably cause an error in VAMSAs!");
441  0 exons = new int[] { dna.getStart() + (codonStart - 1),
442    dna.getEnd() - 3 };
443  0 dnaToProteinMapping = new Mapping(product, exons,
444    new int[]
445    { 1, translationLength }, 3, 1);
446    }
447    }
448    else
449    {
450    // Trim the exon mapping if necessary - the given product may only be a
451    // fragment of a larger protein. (EMBL:AY043181 is an example)
452   
453  3 if (isEmblCdna)
454    {
455    // TODO: Add a DbRef back to the parent EMBL sequence with the exon
456    // map
457    // if given a dataset reference, search dataset for parent EMBL
458    // sequence if it exists and set its map
459    // make a new feature annotating the coding contig
460    }
461    else
462    {
463    // final product length truncation check
464  3 int[] cdsRanges = adjustForProteinLength(translationLength,
465    exons);
466  3 dnaToProteinMapping = new Mapping(product, cdsRanges,
467    new int[]
468    { 1, translationLength }, 3, 1);
469  3 if (product != null)
470    {
471    /*
472    * make xref with mapping from protein to EMBL dna
473    */
474  3 DBRefEntry proteinToEmblRef = new DBRefEntry(DBRefSource.EMBL,
475    sequenceVersion, proteinId,
476    new Mapping(dnaToProteinMapping.getMap().getInverse()));
477  3 product.addDBRef(proteinToEmblRef);
478   
479    /*
480    * make xref from protein to EMBLCDS; we assume here that the
481    * CDS sequence version is same as dna sequence (?!)
482    */
483  3 MapList proteinToCdsMapList = new MapList(
484    new int[]
485    { 1, translationLength },
486    new int[]
487    { 1 + (codonStart - 1),
488    (codonStart - 1) + 3 * translationLength },
489    1, 3);
490  3 DBRefEntry proteinToEmblCdsRef = new DBRefEntry(
491    DBRefSource.EMBLCDS, sequenceVersion, proteinId,
492    new Mapping(proteinToCdsMapList));
493  3 product.addDBRef(proteinToEmblCdsRef);
494   
495    /*
496    * make 'direct' xref from protein to EMBLCDSPROTEIN
497    */
498  3 proteinToEmblProteinRef = new DBRefEntry(proteinToEmblCdsRef);
499  3 proteinToEmblProteinRef.setSource(DBRefSource.EMBLCDSProduct);
500  3 proteinToEmblProteinRef.setMap(null);
501  3 product.addDBRef(proteinToEmblProteinRef);
502    }
503    }
504    }
505   
506    /*
507    * add cds features to dna sequence
508    */
509  3 String cds = feature.getName(); // "CDS"
510  7 for (int xint = 0; exons != null
511    && xint < exons.length - 1; xint += 2)
512    {
513  4 int exonStart = exons[xint];
514  4 int exonEnd = exons[xint + 1];
515  4 int begin = Math.min(exonStart, exonEnd);
516  4 int end = Math.max(exonStart, exonEnd);
517  4 int exonNumber = xint / 2 + 1;
518  4 String desc = String.format("Exon %d for protein '%s' EMBLCDS:%s",
519    exonNumber, proteinName, proteinId);
520   
521  4 SequenceFeature sf = makeCdsFeature(cds, desc, begin, end, sourceDb,
522    vals);
523   
524  4 sf.setEnaLocation(feature.getLocation());
525  4 boolean forwardStrand = exonStart <= exonEnd;
526  4 sf.setStrand(forwardStrand ? "+" : "-");
527  4 sf.setPhase(String.valueOf(codonStart - 1));
528  4 sf.setValue(FeatureProperties.EXONPOS, exonNumber);
529  4 sf.setValue(FeatureProperties.EXONPRODUCT, proteinName);
530   
531  4 dna.addSequenceFeature(sf);
532    }
533    }
534   
535    /*
536    * add feature dbRefs to sequence, and mappings for Uniprot xrefs
537    */
538  3 boolean hasUniprotDbref = false;
539  3 List<XrefType> xrefs = feature.getXref();
540  3 if (xrefs != null)
541    {
542  3 boolean mappingUsed = false;
543  3 for (XrefType xref : xrefs)
544    {
545    /*
546    * ensure UniProtKB/Swiss-Prot converted to UNIPROT
547    */
548  3 String source = DBRefUtils.getCanonicalName(xref.getDb());
549  3 String version = xref.getSecondaryId();
550  3 if (version == null || "".equals(version))
551    {
552  2 version = "0";
553    }
554  3 DBRefEntry dbref = new DBRefEntry(source, version, xref.getId());
555  3 DBRefEntry proteinDbRef = new DBRefEntry(source, version,
556    dbref.getAccessionId());
557  3 if (source.equals(DBRefSource.UNIPROT))
558    {
559  3 String proteinSeqName = DBRefSource.UNIPROT + "|"
560    + dbref.getAccessionId();
561  3 if (dnaToProteinMapping != null
562    && dnaToProteinMapping.getTo() != null)
563    {
564  3 if (mappingUsed)
565    {
566    /*
567    * two or more Uniprot xrefs for the same CDS -
568    * each needs a distinct Mapping (as to a different sequence)
569    */
570  1 dnaToProteinMapping = new Mapping(dnaToProteinMapping);
571    }
572  3 mappingUsed = true;
573   
574    /*
575    * try to locate the protein mapped to (possibly by a
576    * previous CDS feature); if not found, construct it from
577    * the EMBL translation
578    */
579  3 SequenceI proteinSeq = matcher.findIdMatch(proteinSeqName);
580  3 if (proteinSeq == null)
581    {
582  3 proteinSeq = new Sequence(proteinSeqName,
583    product.getSequenceAsString());
584  3 matcher.add(proteinSeq);
585  3 proteinSeq.setDescription(product.getDescription());
586  3 peptides.add(proteinSeq);
587    }
588  3 dnaToProteinMapping.setTo(proteinSeq);
589  3 dnaToProteinMapping.setMappedFromId(proteinId);
590  3 proteinSeq.addDBRef(proteinDbRef);
591  3 dbref.setMap(dnaToProteinMapping);
592    }
593  3 hasUniprotDbref = true;
594    }
595  3 if (product != null)
596    {
597    /*
598    * copy feature dbref to our protein product
599    */
600  3 DBRefEntry pref = proteinDbRef;
601  3 pref.setMap(null); // reference is direct
602  3 product.addDBRef(pref);
603    // Add converse mapping reference
604  3 if (dnaToProteinMapping != null)
605    {
606  3 Mapping pmap = new Mapping(dna,
607    dnaToProteinMapping.getMap().getInverse());
608  3 pref = new DBRefEntry(sourceDb, sequenceVersion, accession);
609  3 pref.setMap(pmap);
610  3 if (dnaToProteinMapping.getTo() != null)
611    {
612  3 dnaToProteinMapping.getTo().addDBRef(pref);
613    }
614    }
615    }
616  3 dna.addDBRef(dbref);
617    }
618    }
619   
620    /*
621    * if we have a product (translation) but no explicit Uniprot dbref
622    * (example: EMBL AAFI02000057 protein_id EAL65544.1)
623    * then construct mappings to an assumed EMBLCDSPROTEIN accession
624    */
625  3 if (!hasUniprotDbref && product != null)
626    {
627  1 if (proteinToEmblProteinRef == null)
628    {
629    // assuming CDSPROTEIN sequence version = dna version (?!)
630  0 proteinToEmblProteinRef = new DBRefEntry(DBRefSource.EMBLCDSProduct,
631    sequenceVersion, proteinId);
632    }
633  1 product.addDBRef(proteinToEmblProteinRef);
634   
635  1 if (dnaToProteinMapping != null
636    && dnaToProteinMapping.getTo() != null)
637    {
638  1 DBRefEntry dnaToEmblProteinRef = new DBRefEntry(
639    DBRefSource.EMBLCDSProduct, sequenceVersion, proteinId);
640  1 dnaToEmblProteinRef.setMap(dnaToProteinMapping);
641  1 dnaToProteinMapping.setMappedFromId(proteinId);
642  1 dna.addDBRef(dnaToEmblProteinRef);
643    }
644    }
645    }
646   
 
647  0 toggle @Override
648    public boolean isDnaCoding()
649    {
650  0 return true;
651    }
652   
653    /**
654    * Returns the CDS positions as a single array of [start, end, start, end...]
655    * positions. If on the reverse strand, these will be in descending order.
656    *
657    * @param accession
658    * @param feature
659    * @return
660    */
 
661  4 toggle protected int[] getCdsRanges(String accession, Feature feature)
662    {
663  4 String location = feature.getLocation();
664  4 if (location == null)
665    {
666  0 return new int[] {};
667    }
668   
669  4 try
670    {
671  4 List<int[]> ranges = DnaUtils.parseLocation(location);
672  4 return listToArray(ranges);
673    } catch (ParseException e)
674    {
675  0 Console.warn(
676    String.format("Not parsing inexact CDS location %s in ENA %s",
677    location, accession));
678  0 return new int[] {};
679    }
680    }
681   
682    /**
683    * Converts a list of [start, end] ranges to a single array of [start, end,
684    * start, end ...]
685    *
686    * @param ranges
687    * @return
688    */
 
689  4 toggle int[] listToArray(List<int[]> ranges)
690    {
691  4 int[] result = new int[ranges.size() * 2];
692  4 int i = 0;
693  4 for (int[] range : ranges)
694    {
695  9 result[i++] = range[0];
696  9 result[i++] = range[1];
697    }
698  4 return result;
699    }
700   
701    /**
702    * Helper method to construct a SequenceFeature for one cds range
703    *
704    * @param type
705    * feature type ("CDS")
706    * @param desc
707    * description
708    * @param begin
709    * start position
710    * @param end
711    * end position
712    * @param group
713    * feature group
714    * @param vals
715    * map of 'miscellaneous values' for feature
716    * @return
717    */
 
718  4 toggle protected SequenceFeature makeCdsFeature(String type, String desc,
719    int begin, int end, String group, Map<String, String> vals)
720    {
721  4 SequenceFeature sf = new SequenceFeature(type, desc, begin, end, group);
722  4 if (!vals.isEmpty())
723    {
724  1 for (Entry<String, String> val : vals.entrySet())
725    {
726  1 sf.setValue(val.getKey(), val.getValue());
727    }
728    }
729  4 return sf;
730    }
731   
 
732  0 toggle @Override
733    public String getAccessionSeparator()
734    {
735  0 return null;
736    }
737   
 
738  0 toggle @Override
739    public Regex getAccessionValidator()
740    {
741  0 return ACCESSION_REGEX;
742    }
743   
 
744  0 toggle @Override
745    public String getDbVersion()
746    {
747  0 return "0";
748    }
749   
 
750  0 toggle @Override
751    public int getTier()
752    {
753  0 return 0;
754    }
755   
 
756  0 toggle @Override
757    public boolean isValidReference(String accession)
758    {
759  0 if (accession == null || accession.length() < 2)
760    {
761  0 return false;
762    }
763  0 return getAccessionValidator().search(accession);
764    }
765   
766    /**
767    * Truncates (if necessary) the exon intervals to match 3 times the length of
768    * the protein (including truncation for stop codon included in exon)
769    *
770    * @param proteinLength
771    * @param exon
772    * an array of [start, end, start, end...] intervals
773    * @return the same array (if unchanged) or a truncated copy
774    */
 
775  9 toggle static int[] adjustForProteinLength(int proteinLength, int[] exon)
776    {
777  9 if (proteinLength <= 0 || exon == null)
778    {
779  0 return exon;
780    }
781  9 int expectedCdsLength = proteinLength * 3;
782  9 int exonLength = MappingUtils.getLength(Arrays.asList(exon));
783   
784    /*
785    * if exon length matches protein, or is shorter, then leave it unchanged
786    */
787  9 if (expectedCdsLength >= exonLength)
788    {
789  5 return exon;
790    }
791   
792  4 int origxon[];
793  4 int sxpos = -1;
794  4 int endxon = 0;
795  4 origxon = new int[exon.length];
796  4 System.arraycopy(exon, 0, origxon, 0, exon.length);
797  4 int cdspos = 0;
798  10 for (int x = 0; x < exon.length; x += 2)
799    {
800  10 cdspos += Math.abs(exon[x + 1] - exon[x]) + 1;
801  10 if (expectedCdsLength <= cdspos)
802    {
803    // advanced beyond last codon.
804  4 sxpos = x;
805  4 if (expectedCdsLength != cdspos)
806    {
807    // System.err
808    // .println("Truncating final exon interval on region by "
809    // + (cdspos - cdslength));
810    }
811   
812    /*
813    * shrink the final exon - reduce end position if forward
814    * strand, increase it if reverse
815    */
816  4 if (exon[x + 1] >= exon[x])
817    {
818  4 endxon = exon[x + 1] - cdspos + expectedCdsLength;
819    }
820    else
821    {
822  0 endxon = exon[x + 1] + cdspos - expectedCdsLength;
823    }
824  4 break;
825    }
826    }
827   
828  4 if (sxpos != -1)
829    {
830    // and trim the exon interval set if necessary
831  4 int[] nxon = new int[sxpos + 2];
832  4 System.arraycopy(exon, 0, nxon, 0, sxpos + 2);
833  4 nxon[sxpos + 1] = endxon; // update the end boundary for the new exon
834    // set
835  4 exon = nxon;
836    }
837  4 return exon;
838    }
839   
840    }