File EmblEntry.java

Branches:

Statements:

232

Methods:

Classes:

LOC:

872

NCLOC:

567

Total complexity:

Complexity density:

0.41

Statements/Method:

5.66

Methods/Class:

Average method complexity:

2.29

Classes

Class	Line #	Total Statements	Complexity	Uncovered Elements	TOTAL Coverage	Actions
EmblEntry	55	232	94	81	0.77183177.2%

Class EmblEntry

Class EmblEntry	Line # 55	Total Statements 232	Complexity 94	Uncovered Elements 81	TOTAL Coverage 0.77183177.2%
getAccession() : String getAccession() : String	9696	1.01	1.01	0.00	1.0 1.0100%
setAccession(String) : void setAccession(String) : void	105105	1.01	1.01	0.00	1.0 1.0100%
getDbRefs() : Vector<DBRefEntry> getDbRefs() : Vector<DBRefEntry>	113113	1.01	1.01	0.00	1.0 1.0100%
setDbRefs(Vector<DBRefEntry>) : void setDbRefs(Vector<DBRefEntry>) : void	122122	1.01	1.01	0.00	1.0 1.0100%
getFeatures() : Vector<EmblFeature> getFeatures() : Vector<EmblFeature>	130130	1.01	1.01	0.00	1.0 1.0100%
setFeatures(Vector<EmblFeature>) : void setFeatures(Vector<EmblFeature>) : void	139139	1.01	1.01	0.00	1.0 1.0100%
getKeywords() : Vector<String> getKeywords() : Vector<String>	147147	1.01	1.01	0.00	1.0 1.0100%
setKeywords(Vector<String>) : void setKeywords(Vector<String>) : void	156156	1.01	1.01	0.00	1.0 1.0100%
getSequence() : EmblSequence getSequence() : EmblSequence	164164	1.01	1.01	0.00	1.0 1.0100%
setSequence(EmblSequence) : void setSequence(EmblSequence) : void	173173	1.01	1.01	0.00	1.0 1.0100%
getSequence(String,List<SequenceI>) : SequenceI getSequence(String,List<SequenceI>) : SequenceI	186186	22.022	5.05	28.028	0.0 0.00%
makeSequence(String) : SequenceI makeSequence(String) : SequenceI	243243	5.05	2.02	3.03	0.5714286 0.571428657.1%
parseCodingFeature(EmblFeature,String,SequenceI,List<SequenceI>,SequenceIdMatcher) : void parseCodingFeature(EmblFeature,String,SequenceI,List<SequenceI>,SequenceIdMatcher) : void	271271	119.0119	36.036	37.037	0.78612715 0.7861271578.6%
makeCdsFeature(String,String,int,int,String,Map<String, String>) : SequenceFeature makeCdsFeature(String,String,int,int,String,Map<String, String>) : SequenceFeature	607607	12.012	3.03	2.02	0.875 0.87587.5%
getCdsRanges(EmblFeature) : int[] getCdsRanges(EmblFeature) : int[]	637637	7.07	3.03	4.04	0.5555556 0.555555655.6%
listToArray(List<int[]>) : int[] listToArray(List<int[]>) : int[]	664664	6.06	1.01	0.00	1.0 1.0100%
adjustForProteinLength(int,int[]) : int[] adjustForProteinLength(int,int[]) : int[]	686686	27.027	10.010	6.06	0.85365856 0.8536585685.4%
getSequenceVersion() : String getSequenceVersion() : String	753753	1.01	1.01	0.00	1.0 1.0100%
setSequenceVersion(String) : void setSequenceVersion(String) : void	758758	1.01	1.01	0.00	1.0 1.0100%
getSequenceLength() : String getSequenceLength() : String	763763	1.01	1.01	0.00	1.0 1.0100%
setSequenceLength(String) : void setSequenceLength(String) : void	768768	1.01	1.01	0.00	1.0 1.0100%
getEntryVersion() : String getEntryVersion() : String	773773	1.01	1.01	0.00	1.0 1.0100%
setEntryVersion(String) : void setEntryVersion(String) : void	778778	1.01	1.01	0.00	1.0 1.0100%
getMoleculeType() : String getMoleculeType() : String	783783	1.01	1.01	0.00	1.0 1.0100%
setMoleculeType(String) : void setMoleculeType(String) : void	788788	1.01	1.01	0.00	1.0 1.0100%
getTopology() : String getTopology() : String	793793	1.01	1.01	0.00	1.0 1.0100%
setTopology(String) : void setTopology(String) : void	798798	1.01	1.01	0.00	1.0 1.0100%
getTaxonomicDivision() : String getTaxonomicDivision() : String	803803	1.01	1.01	0.00	1.0 1.0100%
setTaxonomicDivision(String) : void setTaxonomicDivision(String) : void	808808	1.01	1.01	0.00	1.0 1.0100%
getDescription() : String getDescription() : String	813813	1.01	1.01	0.00	1.0 1.0100%
setDescription(String) : void setDescription(String) : void	818818	1.01	1.01	0.00	1.0 1.0100%
getFirstPublicDate() : String getFirstPublicDate() : String	823823	1.01	1.01	0.00	1.0 1.0100%
setFirstPublicDate(String) : void setFirstPublicDate(String) : void	828828	1.01	1.01	0.00	1.0 1.0100%
getFirstPublicRelease() : String getFirstPublicRelease() : String	833833	1.01	1.01	0.00	1.0 1.0100%
setFirstPublicRelease(String) : void setFirstPublicRelease(String) : void	838838	1.01	1.01	0.00	1.0 1.0100%
getLastUpdatedDate() : String getLastUpdatedDate() : String	843843	1.01	1.01	0.00	1.0 1.0100%
setLastUpdatedDate(String) : void setLastUpdatedDate(String) : void	848848	1.01	1.01	0.00	1.0 1.0100%
getLastUpdatedRelease() : String getLastUpdatedRelease() : String	853853	1.01	1.01	0.00	1.0 1.0100%
setLastUpdatedRelease(String) : void setLastUpdatedRelease(String) : void	858858	1.01	1.01	0.00	1.0 1.0100%
getDataClass() : String getDataClass() : String	863863	1.01	1.01	0.00	1.0 1.0100%
setDataClass(String) : void setDataClass(String) : void	868868	1.01	1.01	0.00	1.0 1.0100%

Contributing tests

This file is covered by 4 tests. .

Contributing tests

Test contribution	Test	Result
0.69295776	jalview.datamodel.xdb.embl.EmblEntryTest.testParseCodingFeaturejalview.datamodel.xdb.embl.EmblEntryTest.testParseCodingFeature	1PASS
0.1915493	jalview.datamodel.xdb.embl.EmblFileTest.testGetEmblFilejalview.datamodel.xdb.embl.EmblFileTest.testGetEmblFile	1PASS
0.10140845	jalview.datamodel.xdb.embl.EmblEntryTest.testAdjustForProteinLengthjalview.datamodel.xdb.embl.EmblEntryTest.testAdjustForProteinLength	1PASS
0.03661972	jalview.datamodel.xdb.embl.EmblEntryTest.testGetCdsRangesjalview.datamodel.xdb.embl.EmblEntryTest.testGetCdsRanges	1PASS

Source view

* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)

* Copyright (C) $$Year-Rel$$ The Jalview Authors

* This file is part of Jalview.

* Jalview is free software: you can redistribute it and/or

* modify it under the terms of the GNU General Public License

* as published by the Free Software Foundation, either version 3

* of the License, or (at your option) any later version.

* Jalview is distributed in the hope that it will be useful, but

* WITHOUT ANY WARRANTY; without even the implied warranty

* of MERCHANTABILITY or FITNESS FOR A PARTICULAR

* PURPOSE. See the GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with Jalview. If not, see <http://www.gnu.org/licenses/>.

* The Jalview Authors are detailed in the 'AUTHORS' file.

package jalview.datamodel.xdb.embl;

import jalview.analysis.SequenceIdMatcher;

import jalview.bin.Cache;

import jalview.datamodel.DBRefEntry;

import jalview.datamodel.DBRefSource;

import jalview.datamodel.FeatureProperties;

import jalview.datamodel.Mapping;

import jalview.datamodel.Sequence;

import jalview.datamodel.SequenceFeature;

import jalview.datamodel.SequenceI;

import jalview.util.DBRefUtils;

import jalview.util.DnaUtils;

import jalview.util.MapList;

import jalview.util.MappingUtils;

import jalview.util.StringUtils;

import java.text.ParseException;

import java.util.Arrays;

import java.util.Hashtable;

import java.util.List;

import java.util.Map;

import java.util.Map.Entry;

import java.util.Vector;

import java.util.regex.Pattern;

/**

* Data model for one entry returned from an EMBL query, as marshalled by a

* Castor binding file

* For example: http://www.ebi.ac.uk/ena/data/view/J03321&display=xml

* @see embl_mapping.xml

public class EmblEntry

{

private static final Pattern SPACE_PATTERN = Pattern.compile(" ");

String accession;

String entryVersion;

String sequenceVersion;

String dataClass;

String moleculeType;

String topology;

String sequenceLength;

String taxonomicDivision;

String description;

String firstPublicDate;

String firstPublicRelease;

String lastUpdatedDate;

String lastUpdatedRelease;

Vector<String> keywords;

Vector<DBRefEntry> dbRefs;

Vector<EmblFeature> features;

EmblSequence sequence;

/**

* @return the accession

public String getAccession()

{

return accession;

}

/**

* @param accession

* the accession to set

104

105

public void setAccession(String accession)

106

{

107

this.accession = accession;

}

/**

* @return the dbRefs

public Vector<DBRefEntry> getDbRefs()

{

return dbRefs;

}

/**

* @param dbRefs

* the dbRefs to set

public void setDbRefs(Vector<DBRefEntry> dbRefs)

123

{

124

this.dbRefs = dbRefs;

}

/**

* @return the features

129

130

public Vector<EmblFeature> getFeatures()

{

return features;

}

/**

* @param features

* the features to set

138

139

public void setFeatures(Vector<EmblFeature> features)

140

{

141

this.features = features;

}

/**

* @return the keywords

146

147

public Vector<String> getKeywords()

{

return keywords;

}

/**

* @param keywords

* the keywords to set

155

156

public void setKeywords(Vector<String> keywords)

157

{

158

this.keywords = keywords;

}

/**

* @return the sequence

163

164

public EmblSequence getSequence()

{

return sequence;

}

/**

* @param sequence

* the sequence to set

172

173

public void setSequence(EmblSequence sequence)

174

{

175

this.sequence = sequence;

}

/**

* Recover annotated sequences from EMBL file

* @param sourceDb

* @param peptides

* a list of protein products found so far (to add to)

184

* @return dna dataset sequence with DBRefs and features

185

186

public SequenceI getSequence(String sourceDb, List<SequenceI> peptides)

187

{

188

SequenceI dna = makeSequence(sourceDb);

if (dna == null)

{

return null;

}

dna.setDescription(description);

194

DBRefEntry retrievedref = new DBRefEntry(sourceDb, getSequenceVersion(),

195

accession);

196

dna.addDBRef(retrievedref);

197

// add map to indicate the sequence is a valid coordinate frame for the

198

// dbref

199

retrievedref

200

.setMap(new Mapping(null, new int[]

201

{ 1, dna.getLength() }, new int[] { 1, dna.getLength() }, 1,

1));

* transform EMBL Database refs to canonical form

if (dbRefs != null)

{

for (DBRefEntry dbref : dbRefs)

210

{

211

dbref.setSource(DBRefUtils.getCanonicalName(dbref.getSource()));

dna.addDBRef(dbref);

}

}

SequenceIdMatcher matcher = new SequenceIdMatcher(peptides);

217

try

218

{

219

for (EmblFeature feature : features)

220

{

221

if (FeatureProperties.isCodingFeature(sourceDb, feature.getName()))

222

{

223

parseCodingFeature(feature, sourceDb, dna, peptides, matcher);

224

}

225

}

226

} catch (Exception e)

227

{

228

System.err.println("EMBL Record Features parsing error!");

229

System.err

230

.println("Please report the following to help@jalview.org :");

231

System.err.println("EMBL Record " + accession);

232

System.err.println("Resulted in exception: " + e.getMessage());

233

e.printStackTrace(System.err);

}

return dna;

}

/**

* @param sourceDb

* @return

SequenceI makeSequence(String sourceDb)

244

{

245

if (sequence == null)

246

{

247

System.err.println(

248

"No sequence was returned for ENA accession " + accession);

249

return null;

250

}

251

SequenceI dna = new Sequence(sourceDb + "|" + accession,

252

sequence.getSequence());

return dna;

}

/**

* Extracts coding region and product from a CDS feature and properly decorate

258

* it with annotations.

* @param feature

* coding feature

* @param sourceDb

* source database for the EMBLXML

264

* @param dna

265

* parent dna sequence for this record

266

* @param peptides

267

* list of protein product sequences for Embl entry

268

* @param matcher

269

* helper to match xrefs in already retrieved sequences

270

271

void parseCodingFeature(EmblFeature feature, String sourceDb,

272

SequenceI dna, List<SequenceI> peptides,

273

SequenceIdMatcher matcher)

274

{

275

boolean isEmblCdna = sourceDb.equals(DBRefSource.EMBLCDS);

276

277

int[] exons = getCdsRanges(feature);

278

279

String translation = null;

280

String proteinName = "";

281

String proteinId = null;

282

Map<String, String> vals = new Hashtable<>();

283

284

285

* codon_start 1/2/3 in EMBL corresponds to phase 0/1/2 in CDS

286

* (phase is required for CDS features in GFF3 format)

int codonStart = 1;

* parse qualifiers, saving protein translation, protein id,

292

* codon start position, product (name), and 'other values'

293

294

if (feature.getQualifiers() != null)

295

{

296

for (Qualifier q : feature.getQualifiers())

297

{

298

String qname = q.getName();

299

if (qname.equals("translation"))

300

{

301

// remove all spaces (precompiled String.replaceAll(" ", ""))

302

translation = SPACE_PATTERN.matcher(q.getValues()[0])

303

.replaceAll("");

304

}

305

else if (qname.equals("protein_id"))

306

{

307

proteinId = q.getValues()[0].trim();

308

}

309

else if (qname.equals("codon_start"))

{

try

{

codonStart = Integer.parseInt(q.getValues()[0].trim());

314

} catch (NumberFormatException e)

315

{

316

System.err.println("Invalid codon_start in XML for " + accession

317

+ ": " + e.getMessage());

318

}

319

}

320

else if (qname.equals("product"))

321

{

322

// sometimes name is returned e.g. for V00488

323

proteinName = q.getValues()[0].trim();

}

else

{

// throw anything else into the additional properties hash

328

String[] qvals = q.getValues();

329

if (qvals != null)

330

{

331

String commaSeparated = StringUtils.arrayToSeparatorList(qvals,

332

",");

333

vals.put(qname, commaSeparated);

}

}

}

}

DBRefEntry proteinToEmblProteinRef = null;

340

exons = MappingUtils.removeStartPositions(codonStart - 1, exons);

341

342

SequenceI product = null;

343

Mapping dnaToProteinMapping = null;

344

if (translation != null && proteinName != null && proteinId != null)

345

{

346

int translationLength = translation.length();

347

348

349

* look for product in peptides list, if not found, add it

350

351

product = matcher.findIdMatch(proteinId);

352

if (product == null)

353

{

354

product = new Sequence(proteinId, translation, 1,

355

translationLength);

356

product.setDescription(((proteinName.length() == 0)

357

? "Protein Product from " + sourceDb

358

: proteinName));

359

peptides.add(product);

360

matcher.add(product);

361

}

362

363

// we have everything - create the mapping and perhaps the protein

364

// sequence

365

if (exons == null || exons.length == 0)

366

{

367

368

* workaround until we handle dna location for CDS sequence

369

* e.g. location="X53828.1:60..1058" correctly

370

371

System.err.println(

372

"Implementation Notice: EMBLCDS records not properly supported yet - Making up the CDNA region of this sequence... may be incorrect ("

373

+ sourceDb + ":" + getAccession() + ")");

374

int dnaLength = dna.getLength();

375

if (translationLength * 3 == (1 - codonStart + dnaLength))

376

{

377

System.err.println(

378

"Not allowing for additional stop codon at end of cDNA fragment... !");

379

// this might occur for CDS sequences where no features are marked

380

exons = new int[] { dna.getStart() + (codonStart - 1),

381

dna.getEnd() };

382

dnaToProteinMapping = new Mapping(product, exons,

383

new int[]

384

{ 1, translationLength }, 3, 1);

385

}

386

if ((translationLength + 1) * 3 == (1 - codonStart + dnaLength))

387

{

388

System.err.println(

389

"Allowing for additional stop codon at end of cDNA fragment... will probably cause an error in VAMSAs!");

390

exons = new int[] { dna.getStart() + (codonStart - 1),

391

dna.getEnd() - 3 };

392

dnaToProteinMapping = new Mapping(product, exons,

393

new int[]

394

{ 1, translationLength }, 3, 1);

}

}

else

{

// Trim the exon mapping if necessary - the given product may only be a

400

// fragment of a larger protein. (EMBL:AY043181 is an example)

if (isEmblCdna)

{

// TODO: Add a DbRef back to the parent EMBL sequence with the exon

405

// map

406

// if given a dataset reference, search dataset for parent EMBL

407

// sequence if it exists and set its map

408

// make a new feature annotating the coding contig

}

else

{

// final product length truncation check

413

int[] cdsRanges = adjustForProteinLength(translationLength,

414

exons);

415

dnaToProteinMapping = new Mapping(product, cdsRanges,

416

new int[]

417

{ 1, translationLength }, 3, 1);

if (product != null)

{

* make xref with mapping from protein to EMBL dna

422

423

DBRefEntry proteinToEmblRef = new DBRefEntry(DBRefSource.EMBL,

424

getSequenceVersion(), proteinId,

425

new Mapping(dnaToProteinMapping.getMap().getInverse()));

426

product.addDBRef(proteinToEmblRef);

427

428

429

* make xref from protein to EMBLCDS; we assume here that the

430

* CDS sequence version is same as dna sequence (?!)

431

432

MapList proteinToCdsMapList = new MapList(

433

new int[]

434

{ 1, translationLength },

435

new int[]

436

{ 1 + (codonStart - 1),

437

(codonStart - 1) + 3 * translationLength },

438

1, 3);

439

DBRefEntry proteinToEmblCdsRef = new DBRefEntry(

440

DBRefSource.EMBLCDS, getSequenceVersion(), proteinId,

441

new Mapping(proteinToCdsMapList));

442

product.addDBRef(proteinToEmblCdsRef);

443

444

445

* make 'direct' xref from protein to EMBLCDSPROTEIN

446

447

proteinToEmblProteinRef = new DBRefEntry(proteinToEmblCdsRef);

448

proteinToEmblProteinRef.setSource(DBRefSource.EMBLCDSProduct);

449

proteinToEmblProteinRef.setMap(null);

450

product.addDBRef(proteinToEmblProteinRef);

}

}

}

* add cds features to dna sequence

457

458

String cds = feature.getName(); // "CDS"

459

for (int xint = 0; exons != null && xint < exons.length - 1; xint += 2)

460

{

461

int exonStart = exons[xint];

462

int exonEnd = exons[xint + 1];

463

int begin = Math.min(exonStart, exonEnd);

464

int end = Math.max(exonStart, exonEnd);

465

int exonNumber = xint / 2 + 1;

466

String desc = String.format("Exon %d for protein '%s' EMBLCDS:%s",

467

exonNumber, proteinName, proteinId);

468

469

SequenceFeature sf = makeCdsFeature(cds, desc, begin, end,

470

sourceDb, vals);

471

472

sf.setEnaLocation(feature.getLocation());

473

boolean forwardStrand = exonStart <= exonEnd;

474

sf.setStrand(forwardStrand ? "+" : "-");

475

sf.setPhase(String.valueOf(codonStart - 1));

476

sf.setValue(FeatureProperties.EXONPOS, exonNumber);

477

sf.setValue(FeatureProperties.EXONPRODUCT, proteinName);

478

479

dna.addSequenceFeature(sf);

}

}

* add feature dbRefs to sequence, and mappings for Uniprot xrefs

485

486

boolean hasUniprotDbref = false;

487

if (feature.dbRefs != null)

488

{

489

boolean mappingUsed = false;

490

for (DBRefEntry ref : feature.dbRefs)

491

{

492

493

* ensure UniProtKB/Swiss-Prot converted to UNIPROT

494

495

String source = DBRefUtils.getCanonicalName(ref.getSource());

496

ref.setSource(source);

497

DBRefEntry proteinDbRef = new DBRefEntry(ref.getSource(),

498

ref.getVersion(), ref.getAccessionId());

499

if (source.equals(DBRefSource.UNIPROT))

500

{

501

String proteinSeqName = DBRefSource.UNIPROT + "|"

502

+ ref.getAccessionId();

503

if (dnaToProteinMapping != null

504

&& dnaToProteinMapping.getTo() != null)

{

if (mappingUsed)

{

* two or more Uniprot xrefs for the same CDS -

510

* each needs a distinct Mapping (as to a different sequence)

511

512

dnaToProteinMapping = new Mapping(dnaToProteinMapping);

}

mappingUsed = true;

* try to locate the protein mapped to (possibly by a

518

* previous CDS feature); if not found, construct it from

519

* the EMBL translation

520

521

SequenceI proteinSeq = matcher.findIdMatch(proteinSeqName);

522

if (proteinSeq == null)

523

{

524

proteinSeq = new Sequence(proteinSeqName,

525

product.getSequenceAsString());

526

matcher.add(proteinSeq);

527

peptides.add(proteinSeq);

528

}

529

dnaToProteinMapping.setTo(proteinSeq);

530

dnaToProteinMapping.setMappedFromId(proteinId);

531

proteinSeq.addDBRef(proteinDbRef);

532

ref.setMap(dnaToProteinMapping);

533

}

534

hasUniprotDbref = true;

}

if (product != null)

{

* copy feature dbref to our protein product

540

541

DBRefEntry pref = proteinDbRef;

542

pref.setMap(null); // reference is direct

543

product.addDBRef(pref);

544

// Add converse mapping reference

545

if (dnaToProteinMapping != null)

546

{

547

Mapping pmap = new Mapping(dna,

548

dnaToProteinMapping.getMap().getInverse());

549

pref = new DBRefEntry(sourceDb, getSequenceVersion(),

550

this.getAccession());

551

pref.setMap(pmap);

552

if (dnaToProteinMapping.getTo() != null)

553

{

554

dnaToProteinMapping.getTo().addDBRef(pref);

}

}

}

dna.addDBRef(ref);

}

}

* if we have a product (translation) but no explicit Uniprot dbref

564

* (example: EMBL AAFI02000057 protein_id EAL65544.1)

565

* then construct mappings to an assumed EMBLCDSPROTEIN accession

566

567

if (!hasUniprotDbref && product != null)

568

{

569

if (proteinToEmblProteinRef == null)

570

{

571

// assuming CDSPROTEIN sequence version = dna version (?!)

572

proteinToEmblProteinRef = new DBRefEntry(DBRefSource.EMBLCDSProduct,

573

getSequenceVersion(), proteinId);

574

}

575

product.addDBRef(proteinToEmblProteinRef);

576

577

if (dnaToProteinMapping != null

578

&& dnaToProteinMapping.getTo() != null)

579

{

580

DBRefEntry dnaToEmblProteinRef = new DBRefEntry(

581

DBRefSource.EMBLCDSProduct, getSequenceVersion(),

582

proteinId);

583

dnaToEmblProteinRef.setMap(dnaToProteinMapping);

584

dnaToProteinMapping.setMappedFromId(proteinId);

585

dna.addDBRef(dnaToEmblProteinRef);

}

}

}

/**

* Helper method to construct a SequenceFeature for one cds range

592

593

* @param type

594

* feature type ("CDS")

* @param desc

* description

* @param begin

* start position

* @param end

* end position

* @param group

* feature group

* @param vals

* map of 'miscellaneous values' for feature

605

* @return

606

607

protected SequenceFeature makeCdsFeature(String type, String desc,

608

int begin, int end, String group, Map<String, String> vals)

609

{

610

SequenceFeature sf = new SequenceFeature(type, desc, begin, end, group);

611

if (!vals.isEmpty())

612

{

613

StringBuilder sb = new StringBuilder();

614

boolean first = true;

615

for (Entry<String, String> val : vals.entrySet())

{

if (!first)

{

sb.append(";");

}

sb.append(val.getKey()).append("=").append(val.getValue());

622

first = false;

623

sf.setValue(val.getKey(), val.getValue());

624

}

625

sf.setAttributes(sb.toString());

}

return sf;

}

/**

* Returns the CDS positions as a single array of [start, end, start, end...]

632

* positions. If on the reverse strand, these will be in descending order.

* @param feature

* @return

protected int[] getCdsRanges(EmblFeature feature)

638

{

639

if (feature.location == null)

{

return new int[] {};

}

try

{

List<int[]> ranges = DnaUtils.parseLocation(feature.location);

647

return listToArray(ranges);

648

} catch (ParseException e)

649

{

650

Cache.log.warn(

651

String.format("Not parsing inexact CDS location %s in ENA %s",

652

feature.location, this.accession));

return new int[] {};

}

}

/**

* Converts a list of [start, end] ranges to a single array of [start, end,

* start, end ...]

* @param ranges

* @return

int[] listToArray(List<int[]> ranges)

665

{

666

int[] result = new int[ranges.size() * 2];

667

int i = 0;

668

for (int[] range : ranges)

669

{

670

result[i++] = range[0];

671

result[i++] = range[1];

}

return result;

}

/**

* Truncates (if necessary) the exon intervals to match 3 times the length of

678

* the protein; also accepts 3 bases longer (for stop codon not included in

679

* protein)

680

681

* @param proteinLength

682

* @param exon

683

* an array of [start, end, start, end...] intervals

684

* @return the same array (if unchanged) or a truncated copy

685

686

static int[] adjustForProteinLength(int proteinLength, int[] exon)

687

{

688

if (proteinLength <= 0 || exon == null)

{

return exon;

}

int expectedCdsLength = proteinLength * 3;

693

int exonLength = MappingUtils.getLength(Arrays.asList(exon));

694

695

696

* if exon length matches protein, or is shorter, or longer by the

697

* length of a stop codon (3 bases), then leave it unchanged

698

699

if (expectedCdsLength >= exonLength

700

|| expectedCdsLength == exonLength - 3)

{

return exon;

}

int origxon[];

int sxpos = -1;

int endxon = 0;

origxon = new int[exon.length];

709

System.arraycopy(exon, 0, origxon, 0, exon.length);

710

int cdspos = 0;

711

for (int x = 0; x < exon.length; x += 2)

712

{

713

cdspos += Math.abs(exon[x + 1] - exon[x]) + 1;

714

if (expectedCdsLength <= cdspos)

715

{

716

// advanced beyond last codon.

717

sxpos = x;

718

if (expectedCdsLength != cdspos)

719

{

720

// System.err

721

// .println("Truncating final exon interval on region by "

722

// + (cdspos - cdslength));

}

* shrink the final exon - reduce end position if forward

727

* strand, increase it if reverse

728

729

if (exon[x + 1] >= exon[x])

730

{

731

endxon = exon[x + 1] - cdspos + expectedCdsLength;

}

else

{

endxon = exon[x + 1] + cdspos - expectedCdsLength;

}

break;

}

}

if (sxpos != -1)

{

// and trim the exon interval set if necessary

744

int[] nxon = new int[sxpos + 2];

745

System.arraycopy(exon, 0, nxon, 0, sxpos + 2);

746

nxon[sxpos + 1] = endxon; // update the end boundary for the new exon

// set

exon = nxon;

}

return exon;

}

public String getSequenceVersion()

754

{

755

return sequenceVersion;

756

}

757

758

public void setSequenceVersion(String sequenceVersion)

759

{

760

this.sequenceVersion = sequenceVersion;

761

}

762

763

public String getSequenceLength()

764

{

765

return sequenceLength;

766

}

767

768

public void setSequenceLength(String sequenceLength)

769

{

770

this.sequenceLength = sequenceLength;

771

}

772

773

public String getEntryVersion()

{

return entryVersion;

}

public void setEntryVersion(String entryVersion)

779

{

780

this.entryVersion = entryVersion;

781

}

782

783

public String getMoleculeType()

{

return moleculeType;

}

public void setMoleculeType(String moleculeType)

789

{

790

this.moleculeType = moleculeType;

791

}

792

793

public String getTopology()

{

return topology;

}

public void setTopology(String topology)

799

{

800

this.topology = topology;

801

}

802

803

public String getTaxonomicDivision()

804

{

805

return taxonomicDivision;

806

}

807

808

public void setTaxonomicDivision(String taxonomicDivision)

809

{

810

this.taxonomicDivision = taxonomicDivision;

811

}

812

813

public String getDescription()

{

return description;

}

public void setDescription(String description)

819

{

820

this.description = description;

821

}

822

823

public String getFirstPublicDate()

824

{

825

return firstPublicDate;

826

}

827

828

public void setFirstPublicDate(String firstPublicDate)

829

{

830

this.firstPublicDate = firstPublicDate;

831

}

832

833

public String getFirstPublicRelease()

834

{

835

return firstPublicRelease;

836

}

837

838

public void setFirstPublicRelease(String firstPublicRelease)

839

{

840

this.firstPublicRelease = firstPublicRelease;

841

}

842

843

public String getLastUpdatedDate()

844

{

845

return lastUpdatedDate;

846

}

847

848

public void setLastUpdatedDate(String lastUpdatedDate)

849

{

850

this.lastUpdatedDate = lastUpdatedDate;

851

}

852

853

public String getLastUpdatedRelease()

854

{

855

return lastUpdatedRelease;

856

}

857

858

public void setLastUpdatedRelease(String lastUpdatedRelease)

859

{

860

this.lastUpdatedRelease = lastUpdatedRelease;

861

}

862

863

public String getDataClass()

{

return dataClass;

}

public void setDataClass(String dataClass)

869

{

870

this.dataClass = dataClass;

871

}

872

}

jalviewX

File EmblEntry.java

Coverage histogram

Code metrics

Classes

Class EmblEntry

Contributing tests

Contributing tests

Source view