File FeaturesFile.java

Branches:

188

Statements:

363

Methods:

Classes:

LOC:

1,291

NCLOC:

798

Total complexity:

145

Complexity density:

0.4

Statements/Method:

14.52

Methods/Class:

Average method complexity:

5.8

Classes

Class	Line #	Total Statements	Complexity	Uncovered Elements	TOTAL Coverage	Actions
FeaturesFile	71	363	145	129	0.776041777.6%

Class FeaturesFile

Class FeaturesFile	Line # 71	Total Statements 363	Complexity 145	Uncovered Elements 129	TOTAL Coverage 0.776041777.6%
FeaturesFile() FeaturesFile()	100100	0.00	1.01	0.00	-1.0 -1.0 -
FeaturesFile(Object,DataSourceType) FeaturesFile(Object,DataSourceType)	111111	1.01	1.01	0.00	1.0 1.0100%
FeaturesFile(FileParse) FeaturesFile(FileParse)	121121	1.01	1.01	0.00	1.0 1.0100%
FeaturesFile(boolean,Object,DataSourceType) FeaturesFile(boolean,Object,DataSourceType)	134134	1.01	1.01	0.00	1.0 1.0100%
parse(AlignmentI,Map<String, FeatureColourI>,boolean) : boolean parse(AlignmentI,Map<String, FeatureColourI>,boolean) : boolean	152152	1.01	1.01	0.00	1.0 1.0100%
addProperties(AlignmentI) : void addProperties(AlignmentI) : void	162162	5.05	4.04	2.02	0.7777778 0.777777877.8%
parse(AlignmentI,Map<String, FeatureColourI>,boolean,boolean) : boolean parse(AlignmentI,Map<String, FeatureColourI>,boolean,boolean) : boolean	189189	1.01	1.01	0.00	1.0 1.0100%
parse(AlignmentI,Map<String, FeatureColourI>,Map<String, FeatureMatcherSetI>,boolean,boolean) : boolean parse(AlignmentI,Map<String, FeatureColourI>,Map<String, FeatureMatcherSetI>,boolean,boolean) : boolean	211211	43.043	17.017	10.010	0.85507244 0.8550724485.5%
parseFilters(Map<String, FeatureMatcherSetI>) : void parseFilters(Map<String, FeatureMatcherSetI>) : void	339339	11.011	6.06	4.04	0.7894737 0.789473778.9%
parseJalviewFeature(String,String[],AlignmentI,Map<String, FeatureColourI>,boolean,boolean,String) : boolean parseJalviewFeature(String,String[],AlignmentI,Map<String, FeatureColourI>,boolean,boolean,String) : boolean	380380	37.037	10.010	11.011	0.7755102 0.775510277.6%
resetMatcher() : void resetMatcher() : void	471471	2.02	1.01	0.00	1.0 1.0100%
findSequence(String,AlignmentI,List<SequenceI>,boolean) : SequenceI findSequence(String,AlignmentI,List<SequenceI>,boolean) : SequenceI	497497	19.019	10.010	2.02	0.93939394 0.9393939493.9%
parseDescriptionHTML(SequenceFeature,boolean) : void parseDescriptionHTML(SequenceFeature,boolean) : void	544544	7.07	3.03	2.02	0.8181818 0.818181881.8%
printJalviewFormat(SequenceI[],Map<String, FeatureColourI>,Map<String, FeatureMatcherSetI>,List<String>,boolean) : String printJalviewFormat(SequenceI[],Map<String, FeatureColourI>,Map<String, FeatureMatcherSetI>,List<String>,boolean) : String	581581	24.024	9.09	3.03	0.9166667 0.916666791.7%
outputFeatureFilters(StringBuilder,Map<String, FeatureColourI>,Map<String, FeatureMatcherSetI>) : void outputFeatureFilters(StringBuilder,Map<String, FeatureColourI>,Map<String, FeatureMatcherSetI>) : void	661661	12.012	7.07	0.00	1.0 1.0100%
outputFeaturesByGroup(StringBuilder,List<String>,String[],SequenceI[]) : boolean outputFeaturesByGroup(StringBuilder,List<String>,String[],SequenceI[]) : boolean	704704	21.021	5.05	0.00	1.0 1.0100%
formatJalviewFeature(String,SequenceFeature) : String formatJalviewFeature(String,SequenceFeature) : String	754754	28.028	10.010	8.08	0.8095238 0.809523881%
parse() : void parse() : void	814814	12.012	6.06	11.011	0.5 0.550%
print(SequenceI[],boolean) : String print(SequenceI[],boolean) : String	856856	2.02	1.01	2.02	0.0 0.00%
printGffFormat(SequenceI[],Map<String, FeatureColourI>,List<String>,boolean) : String printGffFormat(SequenceI[],Map<String, FeatureColourI>,List<String>,boolean) : String	874874	39.039	17.017	3.03	0.9508197 0.950819795.1%
constructCodonMappingFromAlign(List<String>,boolean,int) : MapList constructCodonMappingFromAlign(List<String>,boolean,int) : MapList	967967	29.029	5.05	35.035	0.0 0.00%
parseGff(String[],AlignmentI,boolean,List<SequenceI>) : SequenceI parseGff(String[],AlignmentI,boolean,List<SequenceI>) : SequenceI	10511051	17.017	6.06	9.09	0.64 0.6464%
processGffColumnNine(String,SequenceFeature) : void processGffColumnNine(String,SequenceFeature) : void	11061106	8.08	3.03	12.012	0.0 0.00%
processAsFasta(AlignmentI,List<SequenceI>) : void processAsFasta(AlignmentI,List<SequenceI>) : void	11391139	23.023	7.07	3.03	0.9032258 0.903225890.3%
processGffPragma(String,Map<String, String>,AlignmentI,List<SequenceI>) : void processGffPragma(String,Map<String, String>,AlignmentI,List<SequenceI>) : void	12311231	19.019	12.012	9.09	0.7692308 0.769230876.9%

Contributing tests

This file is covered by 14 tests. .

Contributing tests

Test contribution	Test	Result
0.3402778	jalview.io.FeaturesFileTest.testPrintJalviewFormatjalview.io.FeaturesFileTest.testPrintJalviewFormat	1PASS
0.265625	jalview.io.FeaturesFileTest.simpleGff3FileLoaderjalview.io.FeaturesFileTest.simpleGff3FileLoader	1PASS
0.265625	jalview.io.FeaturesFileTest.readGff3Filejalview.io.FeaturesFileTest.readGff3File	1PASS
0.23090278	jalview.io.FeaturesFileTest.simpleGff3RelaxedIdMatchingjalview.io.FeaturesFileTest.simpleGff3RelaxedIdMatching	1PASS
0.22395833	jalview.io.FeaturesFileTest.simpleGff3FileClassjalview.io.FeaturesFileTest.simpleGff3FileClass	1PASS
0.20833333	jalview.io.FeaturesFileTest.testParsejalview.io.FeaturesFileTest.testParse	1PASS
0.171875	jalview.io.gff.ExonerateHelperTest.testAddExonerateGffToAlignmentjalview.io.gff.ExonerateHelperTest.testAddExonerateGffToAlignment	1PASS
0.140625	jalview.io.FeaturesFileTest.testParse_jalviewFeaturesOnlyjalview.io.FeaturesFileTest.testParse_jalviewFeaturesOnly	1PASS
0.13020833	jalview.io.FeaturesFileTest.testParse_mixedJalviewGffjalview.io.FeaturesFileTest.testParse_mixedJalviewGff	1PASS
0.13020833	jalview.io.FeaturesFileTest.testParse_pureGff3jalview.io.FeaturesFileTest.testParse_pureGff3	1PASS
0.12326389	jalview.io.gff.GffTests.testResolveExonerateGffjalview.io.gff.GffTests.testResolveExonerateGff	1PASS
0.104166664	jalview.io.FeaturesFileTest.testPrintGffFormatjalview.io.FeaturesFileTest.testPrintGffFormat	1PASS
0.038194444	jalview.io.FeaturesFileTest.testOutputFeatureFiltersjalview.io.FeaturesFileTest.testOutputFeatureFilters	1PASS
0.026041666	jalview.io.FeaturesFileTest.testParseFiltersjalview.io.FeaturesFileTest.testParseFilters	1PASS

Source view

* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)

* Copyright (C) $$Year-Rel$$ The Jalview Authors

* This file is part of Jalview.

* Jalview is free software: you can redistribute it and/or

* modify it under the terms of the GNU General Public License

* as published by the Free Software Foundation, either version 3

* of the License, or (at your option) any later version.

* Jalview is distributed in the hope that it will be useful, but

* WITHOUT ANY WARRANTY; without even the implied warranty

* of MERCHANTABILITY or FITNESS FOR A PARTICULAR

* PURPOSE. See the GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with Jalview. If not, see <http://www.gnu.org/licenses/>.

* The Jalview Authors are detailed in the 'AUTHORS' file.

package jalview.io;

import jalview.analysis.AlignmentUtils;

import jalview.analysis.SequenceIdMatcher;

import jalview.api.AlignViewportI;

import jalview.api.FeatureColourI;

import jalview.api.FeaturesSourceI;

import jalview.datamodel.AlignedCodonFrame;

import jalview.datamodel.Alignment;

import jalview.datamodel.AlignmentI;

import jalview.datamodel.SequenceDummy;

import jalview.datamodel.SequenceFeature;

import jalview.datamodel.SequenceI;

import jalview.datamodel.features.FeatureMatcherSet;

import jalview.datamodel.features.FeatureMatcherSetI;

import jalview.io.gff.GffHelperBase;

import jalview.io.gff.GffHelperFactory;

import jalview.io.gff.GffHelperI;

import jalview.schemes.FeatureColour;

import jalview.util.ColorUtils;

import jalview.util.MapList;

import jalview.util.ParseHtmlBodyAndLinks;

import jalview.util.StringUtils;

import java.awt.Color;

import java.io.IOException;

import java.util.ArrayList;

import java.util.Arrays;

import java.util.Collections;

import java.util.HashMap;

import java.util.List;

import java.util.Map;

import java.util.Map.Entry;

/**

* Parses and writes features files, which may be in Jalview, GFF2 or GFF3

* format. These are tab-delimited formats but with differences in the use of

* columns.

* A Jalview feature file may define feature colours and then declare that the

* remainder of the file is in GFF format with the line 'GFF'.

* GFF3 files may include alignment mappings for features, which Jalview will

* attempt to model, and may include sequence data following a ##FASTA line.

* @author AMW

* @author jbprocter

* @author gmcarstairs

public class FeaturesFile extends AlignFile implements FeaturesSourceI

{

private static final String TAB_REGEX = "\\t";

private static final String STARTGROUP = "STARTGROUP";

private static final String ENDGROUP = "ENDGROUP";

private static final String STARTFILTERS = "STARTFILTERS";

private static final String ENDFILTERS = "ENDFILTERS";

private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED";

private static final String NOTE = "Note";

protected static final String GFF_VERSION = "##gff-version";

private AlignmentI lastmatchedAl = null;

private SequenceIdMatcher matcher = null;

protected AlignmentI dataset;

protected int gffVersion;

/**

* Creates a new FeaturesFile object.

100

public FeaturesFile()

{

}

/**

* Constructor which does not parse the file immediately

106

107

* @param file File or String filename

108

* @param paste

109

* @throws IOException

110

111

public FeaturesFile(Object file, DataSourceType paste)

112

throws IOException

113

{

114

super(false, file, paste);

}

/**

* @param source

* @throws IOException

120

121

public FeaturesFile(FileParse source) throws IOException

{

super(source);

}

/**

* Constructor that optionally parses the file immediately

128

129

* @param parseImmediately

130

* @param file

131

* @param type

132

* @throws IOException

133

134

public FeaturesFile(boolean parseImmediately, Object file,

135

DataSourceType type) throws IOException

136

{

137

super(parseImmediately, file, type);

}

/**

* Parse GFF or sequence features file using case-independent matching,

* discarding URLs

* @param align

* - alignment/dataset containing sequences that are to be annotated

146

* @param colours

147

* - hashtable to store feature colour definitions

148

* @param removeHTML

149

* - process html strings into plain text

150

* @return true if features were added

151

152

public boolean parse(AlignmentI align,

153

Map<String, FeatureColourI> colours, boolean removeHTML)

154

{

155

return parse(align, colours, removeHTML, false);

}

/**

* Extends the default addProperties by also adding peptide-to-cDNA mappings

160

* (if any) derived while parsing a GFF file

161

162

@Override

163

public void addProperties(AlignmentI al)

164

{

165

super.addProperties(al);

166

if (dataset != null && dataset.getCodonFrames() != null)

167

{

168

AlignmentI ds = (al.getDataset() == null) ? al : al.getDataset();

169

for (AlignedCodonFrame codons : dataset.getCodonFrames())

170

{

171

ds.addCodonFrame(codons);

}

}

}

/**

* Parse GFF or Jalview format sequence features file

178

179

* @param align

180

* - alignment/dataset containing sequences that are to be annotated

181

* @param colours

182

* - map to store feature colour definitions

183

* @param removeHTML

184

* - process html strings into plain text

185

* @param relaxedIdmatching

186

* - when true, ID matches to compound sequence IDs are allowed

187

* @return true if features were added

188

189

public boolean parse(AlignmentI align,

190

Map<String, FeatureColourI> colours, boolean removeHTML,

191

boolean relaxedIdmatching)

192

{

193

return parse(align, colours, null, removeHTML, relaxedIdmatching);

}

/**

* Parse GFF or Jalview format sequence features file

198

199

* @param align

200

* - alignment/dataset containing sequences that are to be annotated

201

* @param colours

202

* - map to store feature colour definitions

203

* @param filters

204

* - map to store feature filter definitions

205

* @param removeHTML

206

* - process html strings into plain text

207

* @param relaxedIdmatching

208

* - when true, ID matches to compound sequence IDs are allowed

209

* @return true if features were added

210

211

public boolean parse(AlignmentI align,

212

Map<String, FeatureColourI> colours,

213

Map<String, FeatureMatcherSetI> filters, boolean removeHTML,

214

boolean relaxedIdmatching)

215

{

216

Map<String, String> gffProps = new HashMap<>();

217

218

* keep track of any sequences we try to create from the data

219

220

List<SequenceI> newseqs = new ArrayList<>();

String line = null;

try

{

String[] gffColumns;

String featureGroup = null;

227

228

while ((line = nextLine()) != null)

229

{

230

// skip comments/process pragmas

231

494

if (line.length() == 0 || line.startsWith("#"))

232

{

233

if (line.toLowerCase().startsWith("##"))

234

{

235

processGffPragma(line, gffProps, align, newseqs);

}

continue;

}

399

gffColumns = line.split(TAB_REGEX);

241

399

if (gffColumns.length == 1)

242

{

243

if (line.trim().equalsIgnoreCase("GFF"))

244

{

245

246

* Jalview features file with appended GFF

247

* assume GFF2 (though it may declare ##gff-version 3)

gffVersion = 2;

continue;

}

}

398

if (gffColumns.length > 0 && gffColumns.length < 4)

255

{

256

257

* if 2 or 3 tokens, we anticipate either 'startgroup', 'endgroup' or

258

* a feature type colour specification

259

260

String ft = gffColumns[0];

261

if (ft.equalsIgnoreCase(STARTFILTERS))

262

{

263

parseFilters(filters);

264

continue;

265

}

266

if (ft.equalsIgnoreCase(STARTGROUP))

267

{

268

featureGroup = gffColumns[1];

269

}

270

else if (ft.equalsIgnoreCase(ENDGROUP))

271

{

272

// We should check whether this is the current group,

273

// but at present there's no way of showing more than 1 group

featureGroup = null;

}

else

{

String colscheme = gffColumns[1];

279

FeatureColourI colour = FeatureColour

280

.parseJalviewFeatureColour(colscheme);

281

if (colour != null)

282

{

283

colours.put(ft, colour);

}

}

continue;

}

* if not a comment, GFF pragma, startgroup, endgroup or feature

291

* colour specification, that just leaves a feature details line

292

* in either Jalview or GFF format

293

294

356

if (gffVersion == 0)

295

{

296

330

parseJalviewFeature(line, gffColumns, align, colours, removeHTML,

297

relaxedIdmatching, featureGroup);

}

else

{

parseGff(gffColumns, align, relaxedIdmatching, newseqs);

}

}

resetMatcher();

} catch (Exception ex)

306

{

307

// should report somewhere useful for UI if necessary

308

warningMessage = ((warningMessage == null) ? "" : warningMessage)

309

+ "Parsing error at\n" + line;

310

System.out.println("Error parsing feature file: " + ex + "\n" + line);

311

ex.printStackTrace(System.err);

resetMatcher();

return false;

}

* experimental - add any dummy sequences with features to the alignment

318

* - we need them for Ensembl feature extraction - though maybe not otherwise

319

320

for (SequenceI newseq : newseqs)

321

{

322

if (newseq.getFeatures().hasFeatures())

323

{

324

align.addSequence(newseq);

}

}

return true;

}

/**

* Reads input lines from STARTFILTERS to ENDFILTERS and adds a feature type

332

* filter to the map for each line parsed. After exit from this method,

333

* nextLine() should return the line after ENDFILTERS (or we are already at

334

* end of file if ENDFILTERS was missing).

335

336

* @param filters

337

* @throws IOException

338

339

protected void parseFilters(Map<String, FeatureMatcherSetI> filters)

throws IOException

{

String line;

while ((line = nextLine()) != null)

344

{

345

if (line.toUpperCase().startsWith(ENDFILTERS))

{

return;

}

String[] tokens = line.split(TAB_REGEX);

350

if (tokens.length != 2)

351

{

352

System.err.println(String.format("Invalid token count %d for %d",

353

tokens.length, line));

}

else

{

String featureType = tokens[0];

358

FeatureMatcherSetI fm = FeatureMatcherSet.fromString(tokens[1]);

359

if (fm != null && filters != null)

360

{

361

filters.put(featureType, fm);

}

}

}

}

/**

* Try to parse a Jalview format feature specification and add it as a

369

* sequence feature to any matching sequences in the alignment. Returns true

370

* if successful (a feature was added), or false if not.

* @param line

* @param gffColumns

* @param alignment

* @param featureColours

376

* @param removeHTML

377

* @param relaxedIdmatching

378

* @param featureGroup

379

380

330

protected boolean parseJalviewFeature(String line, String[] gffColumns,

381

AlignmentI alignment, Map<String, FeatureColourI> featureColours,

382

boolean removeHTML, boolean relaxedIdMatching,

String featureGroup)

{

* tokens: description seqid seqIndex start end type [score]

387

388

330

if (gffColumns.length < 6)

389

{

390

System.err.println("Ignoring feature line '" + line

391

+ "' with too few columns (" + gffColumns.length + ")");

392

return false;

393

}

394

330

String desc = gffColumns[0];

395

330

String seqId = gffColumns[1];

396

330

SequenceI seq = findSequence(seqId, alignment, null, relaxedIdMatching);

397

398

330

if (!ID_NOT_SPECIFIED.equals(seqId))

399

{

400

329

seq = findSequence(seqId, alignment, null, relaxedIdMatching);

}

else

{

seqId = null;

seq = null;

String seqIndex = gffColumns[2];

407

try

408

{

409

int idx = Integer.parseInt(seqIndex);

410

seq = alignment.getSequenceAt(idx);

411

} catch (NumberFormatException ex)

412

{

413

System.err.println("Invalid sequence index: " + seqIndex);

}

}

330

if (seq == null)

418

{

419

System.out.println("Sequence not found: " + line);

return false;

}

330

int startPos = Integer.parseInt(gffColumns[3]);

424

330

int endPos = Integer.parseInt(gffColumns[4]);

425

426

330

String ft = gffColumns[5];

427

428

330

if (!featureColours.containsKey(ft))

429

{

430

431

* Perhaps an old style groups file with no colours -

432

* synthesize a colour from the feature type

433

434

Color colour = ColorUtils.createColourFromName(ft);

435

featureColours.put(ft, new FeatureColour(colour));

436

}

437

330

SequenceFeature sf = null;

438

330

if (gffColumns.length > 6)

439

{

440

float score = Float.NaN;

441

try

442

{

443

score = new Float(gffColumns[6]).floatValue();

444

} catch (NumberFormatException ex)

445

{

446

sf = new SequenceFeature(ft, desc, startPos, endPos, featureGroup);

447

}

448

sf = new SequenceFeature(ft, desc, startPos, endPos, score,

featureGroup);

}

else

{

282

sf = new SequenceFeature(ft, desc, startPos, endPos, featureGroup);

454

}

455

456

330

parseDescriptionHTML(sf, removeHTML);

457

458

330

seq.addSequenceFeature(sf);

459

460

while (seqId != null

461

&& (seq = alignment.findName(seq, seqId, false)) != null)

462

{

463

seq.addSequenceFeature(new SequenceFeature(sf));

464

}

465

330

return true;

}

/**

* clear any temporary handles used to speed up ID matching

470

471

protected void resetMatcher()

472

{

473

lastmatchedAl = null;

matcher = null;

}

/**

* Returns a sequence matching the given id, as follows

479

* <ul>

480

* <li>strict matching is on exact sequence name</li>

481

* <li>relaxed matching allows matching on a token within the sequence name,

482

* or a dbxref</li>

483

* <li>first tries to find a match in the alignment sequences</li>

484

* <li>else tries to find a match in the new sequences already generated while

485

* parsing the features file</li>

486

* <li>else creates a new placeholder sequence, adds it to the new sequences

487

* list, and returns it</li>

* </ul>

* @param seqId

* @param align

* @param newseqs

* @param relaxedIdMatching

* @return

685

protected SequenceI findSequence(String seqId, AlignmentI align,

498

List<SequenceI> newseqs, boolean relaxedIdMatching)

499

{

500

// TODO encapsulate in SequenceIdMatcher, share the matcher

501

// with the GffHelper (removing code duplication)

502

685

SequenceI match = null;

503

685

if (relaxedIdMatching)

504

{

505

if (lastmatchedAl != align)

506

{

507

lastmatchedAl = align;

508

matcher = new SequenceIdMatcher(align.getSequencesArray());

509

if (newseqs != null)

510

{

511

matcher.addAll(newseqs);

512

}

513

}

514

match = matcher.findIdMatch(seqId);

}

else

{

673

match = align.findName(seqId, true);

519

673

if (match == null && newseqs != null)

520

{

521

for (SequenceI m : newseqs)

522

{

523

if (seqId.equals(m.getName()))

{

return m;

}

}

}

}

678

if (match == null && newseqs != null)

532

{

533

match = new SequenceDummy(seqId);

534

if (relaxedIdMatching)

535

{

536

matcher.addAll(Arrays.asList(new SequenceI[] { match }));

537

}

538

// add dummy sequence to the newseqs list

539

newseqs.add(match);

540

}

541

678

return match;

542

}

543

544

330

public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML)

545

{

546

330

if (sf.getDescription() == null)

{

return;

}

330

ParseHtmlBodyAndLinks parsed = new ParseHtmlBodyAndLinks(

551

sf.getDescription(), removeHTML, newline);

552

553

330

if (removeHTML)

554

{

555

326

sf.setDescription(parsed.getNonHtmlContent());

556

}

557

558

330

for (String link : parsed.getLinks())

559

{

560

106

sf.addLink(link);

}

}

/**

* Returns contents of a Jalview format features file, for visible features, as

566

* filtered by type and group. Features with a null group are displayed if their

567

* feature type is visible. Non-positional features may optionally be included

568

* (with no check on type or group).

* @param sequences

* source of features

* @param visible

* map of colour for each visible feature type

574

* @param featureFilters

575

* @param visibleFeatureGroups

576

* @param includeNonPositional

577

* if true, include non-positional features (regardless of group or

* type)

* @return

public String printJalviewFormat(SequenceI[] sequences,

582

Map<String, FeatureColourI> visible,

583

Map<String, FeatureMatcherSetI> featureFilters,

584

List<String> visibleFeatureGroups, boolean includeNonPositional)

585

{

586

if (!includeNonPositional && (visible == null || visible.isEmpty()))

587

{

588

// no point continuing.

589

return "No Features Visible";

}

* write out feature colours (if we know them)

594

595

// TODO: decide if feature links should also be written here ?

596

StringBuilder out = new StringBuilder(256);

597

if (visible != null)

598

{

599

for (Entry<String, FeatureColourI> featureColour : visible.entrySet())

600

{

601

FeatureColourI colour = featureColour.getValue();

602

out.append(colour.toJalviewFormat(featureColour.getKey())).append(

newline);

}

}

String[] types = visible == null ? new String[0] : visible.keySet()

608

.toArray(new String[visible.keySet().size()]);

609

610

611

* feature filters if any

612

613

outputFeatureFilters(out, visible, featureFilters);

614

615

616

* sort groups alphabetically, and ensure that features with a

617

* null or empty group are output after those in named groups

618

619

List<String> sortedGroups = new ArrayList<>(visibleFeatureGroups);

620

sortedGroups.remove(null);

621

sortedGroups.remove("");

622

Collections.sort(sortedGroups);

623

sortedGroups.add(null);

624

sortedGroups.add("");

625

626

boolean foundSome = false;

627

628

629

* first output any non-positional features

630

631

if (includeNonPositional)

632

{

633

for (int i = 0; i < sequences.length; i++)

634

{

635

String sequenceName = sequences[i].getName();

636

for (SequenceFeature feature : sequences[i].getFeatures()

637

.getNonPositionalFeatures())

638

{

639

foundSome = true;

640

out.append(formatJalviewFeature(sequenceName, feature));

}

}

}

* positional features within groups

647

648

foundSome |= outputFeaturesByGroup(out, sortedGroups, types, sequences);

649

650

return foundSome ? out.toString() : "No Features Visible";

}

/**

* Outputs any feature filters defined for visible feature types, sandwiched by

655

* STARTFILTERS and ENDFILTERS lines

* @param out

* @param visible

* @param featureFilters

660

661

void outputFeatureFilters(StringBuilder out,

662

Map<String, FeatureColourI> visible,

663

Map<String, FeatureMatcherSetI> featureFilters)

664

{

665

if (visible == null || featureFilters == null

666

|| featureFilters.isEmpty())

{

return;

}

boolean first = true;

672

for (String featureType : visible.keySet())

673

{

674

FeatureMatcherSetI filter = featureFilters.get(featureType);

if (filter != null)

{

if (first)

{

first = false;

out.append(newline).append(STARTFILTERS).append(newline);

681

}

682

out.append(featureType).append(TAB).append(filter.toStableString())

.append(newline);

}

}

if (!first)

{

out.append(ENDFILTERS).append(newline).append(newline);

}

}

/**

* Appends output of sequence features within feature groups to the output

695

* buffer. Groups other than the null or empty group are sandwiched by

696

* STARTGROUP and ENDGROUP lines.

* @param out

* @param groups

* @param featureTypes

* @param sequences

* @return

private boolean outputFeaturesByGroup(StringBuilder out,

705

List<String> groups, String[] featureTypes, SequenceI[] sequences)

706

{

707

boolean foundSome = false;

708

for (String group : groups)

709

{

710

boolean isNamedGroup = (group != null && !"".equals(group));

if (isNamedGroup)

{

out.append(newline);

out.append(STARTGROUP).append(TAB);

out.append(group);

out.append(newline);

}

* output positional features within groups

721

722

144

for (int i = 0; i < sequences.length; i++)

723

{

724

135

String sequenceName = sequences[i].getName();

725

135

List<SequenceFeature> features = new ArrayList<>();

726

135

if (featureTypes.length > 0)

727

{

728

features.addAll(sequences[i].getFeatures().getFeaturesForGroup(

729

true, group, featureTypes));

730

}

731

732

135

for (SequenceFeature sequenceFeature : features)

733

{

734

foundSome = true;

735

out.append(formatJalviewFeature(sequenceName, sequenceFeature));

}

}

if (isNamedGroup)

{

out.append(ENDGROUP).append(TAB);

out.append(group);

out.append(newline);

}

}

return foundSome;

}

/**

* @param out

* @param sequenceName

752

* @param sequenceFeature

753

754

protected String formatJalviewFeature(

755

String sequenceName, SequenceFeature sequenceFeature)

756

{

757

StringBuilder out = new StringBuilder(64);

758

if (sequenceFeature.description == null

759

|| sequenceFeature.description.equals(""))

760

{

761

out.append(sequenceFeature.type).append(TAB);

}

else

{

if (sequenceFeature.links != null

766

&& sequenceFeature.getDescription().indexOf("<html>") == -1)

767

{

768

out.append("<html>");

769

}

770

771

out.append(sequenceFeature.description);

772

if (sequenceFeature.links != null)

773

{

774

for (int l = 0; l < sequenceFeature.links.size(); l++)

775

{

776

String label = sequenceFeature.links.elementAt(l);

777

String href = label.substring(label.indexOf("|") + 1);

778

label = label.substring(0, label.indexOf("|"));

779

780

if (sequenceFeature.description.indexOf(href) == -1)

781

{

782

out.append(" <a href=\"" + href + "\">" + label + "</a>");

}

}

if (sequenceFeature.getDescription().indexOf("</html>") == -1)

787

{

788

out.append("</html>");

}

}

out.append(TAB);

}

out.append(sequenceName);

795

out.append("\t-1\t");

796

out.append(sequenceFeature.begin);

797

out.append(TAB);

798

out.append(sequenceFeature.end);

799

out.append(TAB);

800

out.append(sequenceFeature.type);

801

if (!Float.isNaN(sequenceFeature.score))

802

{

803

out.append(TAB);

804

out.append(sequenceFeature.score);

}

out.append(newline);

return out.toString();

}

/**

* Parse method that is called when a GFF file is dragged to the desktop

@Override

public void parse()

{

AlignViewportI av = getViewport();

818

if (av != null)

819

{

820

if (av.getAlignment() != null)

821

{

822

dataset = av.getAlignment().getDataset();

}

if (dataset == null)

{

// working in the applet context ?

827

dataset = av.getAlignment();

}

}

else

{

dataset = new Alignment(new SequenceI[] {});

833

}

834

835

Map<String, FeatureColourI> featureColours = new HashMap<>();

836

boolean parseResult = parse(dataset, featureColours, false, true);

837

if (!parseResult)

838

{

839

// pass error up somehow

}

if (av != null)

{

// update viewport with the dataset data ?

}

else

{

setSeqs(dataset.getSequencesArray());

}

}

/**

* Implementation of unused abstract method

853

854

* @return error message

855

856

@Override

857

public String print(SequenceI[] sqs, boolean jvsuffix)

858

{

859

System.out.println("Use printGffFormat() or printJalviewFormat()");

return null;

}

/**

* Returns features output in GFF2 format

865

866

* @param sequences

867

* the sequences whose features are to be output

868

* @param visible

869

* a map whose keys are the type names of visible features

870

* @param visibleFeatureGroups

871

* @param includeNonPositionalFeatures

872

* @return

873

874

public String printGffFormat(SequenceI[] sequences,

875

Map<String, FeatureColourI> visible,

876

List<String> visibleFeatureGroups,

877

boolean includeNonPositionalFeatures)

878

{

879

StringBuilder out = new StringBuilder(256);

880

881

out.append(String.format("%s %d\n", GFF_VERSION, gffVersion == 0 ? 2 : gffVersion));

882

883

if (!includeNonPositionalFeatures

884

&& (visible == null || visible.isEmpty()))

885

{

886

return out.toString();

887

}

888

889

String[] types = visible == null ? new String[0] : visible.keySet()

890

.toArray(

891

new String[visible.keySet().size()]);

892

893

for (SequenceI seq : sequences)

894

{

895

List<SequenceFeature> features = new ArrayList<>();

896

if (includeNonPositionalFeatures)

897

{

898

features.addAll(seq.getFeatures().getNonPositionalFeatures());

899

}

900

if (visible != null && !visible.isEmpty())

901

{

902

features.addAll(seq.getFeatures().getPositionalFeatures(types));

903

}

904

905

for (SequenceFeature sf : features)

906

{

907

String source = sf.featureGroup;

908

if (!sf.isNonPositional() && source != null

909

&& !visibleFeatureGroups.contains(source))

910

{

911

// group is not visible

continue;

}

if (source == null)

{

source = sf.getDescription();

918

}

919

920

out.append(seq.getName());

out.append(TAB);

out.append(source);

out.append(TAB);

out.append(sf.type);

out.append(TAB);

out.append(sf.begin);

out.append(TAB);

out.append(sf.end);

out.append(TAB);

out.append(sf.score);

931

out.append(TAB);

932

933

int strand = sf.getStrand();

934

out.append(strand == 1 ? "+" : (strand == -1 ? "-" : "."));

935

out.append(TAB);

936

937

String phase = sf.getPhase();

938

out.append(phase == null ? "." : phase);

939

940

// miscellaneous key-values (GFF column 9)

941

String attributes = sf.getAttributes();

942

if (attributes != null)

943

{

944

out.append(TAB).append(attributes);

}

out.append(newline);

}

}

return out.toString();

}

/**

* Returns a mapping given list of one or more Align descriptors (exonerate

956

* format)

957

958

* @param alignedRegions

959

* a list of "Align fromStart toStart fromCount"

960

* @param mapIsFromCdna

961

* if true, 'from' is dna, else 'from' is protein

962

* @param strand

963

* either 1 (forward) or -1 (reverse)

964

* @return

965

* @throws IOException

966

967

protected MapList constructCodonMappingFromAlign(

968

List<String> alignedRegions, boolean mapIsFromCdna, int strand)

throws IOException

{

if (strand == 0)

{

throw new IOException(

974

"Invalid strand for a codon mapping (cannot be 0)");

975

}

976

int regions = alignedRegions.size();

977

// arrays to hold [start, end] for each aligned region

978

int[] fromRanges = new int[regions * 2]; // from dna

979

int[] toRanges = new int[regions * 2]; // to protein

980

int fromRangesIndex = 0;

981

int toRangesIndex = 0;

982

983

for (String range : alignedRegions)

984

{

985

986

* Align mapFromStart mapToStart mapFromCount

987

* e.g. if mapIsFromCdna

988

* Align 11270 143 120

989

* means:

990

* 120 bases from pos 11270 align to pos 143 in peptide

991

* if !mapIsFromCdna this would instead be

992

* Align 143 11270 40

993

994

String[] tokens = range.split(" ");

995

if (tokens.length != 3)

996

{

997

throw new IOException("Wrong number of fields for Align");

}

int fromStart = 0;

int toStart = 0;

int fromCount = 0;

try

{

fromStart = Integer.parseInt(tokens[0]);

1005

toStart = Integer.parseInt(tokens[1]);

1006

fromCount = Integer.parseInt(tokens[2]);

1007

} catch (NumberFormatException nfe)

1008

{

1009

throw new IOException(

1010

"Invalid number in Align field: " + nfe.getMessage());

}

* Jalview always models from dna to protein, so adjust values if the

1015

* GFF mapping is from protein to dna

if (!mapIsFromCdna)

{

fromCount *= 3;

int temp = fromStart;

fromStart = toStart;

toStart = temp;

}

fromRanges[fromRangesIndex++] = fromStart;

1025

fromRanges[fromRangesIndex++] = fromStart + strand * (fromCount - 1);

1026

1027

1028

* If a codon has an intron gap, there will be contiguous 'toRanges';

1029

* this is handled for us by the MapList constructor.

1030

* (It is not clear that exonerate ever generates this case)

1031

1032

toRanges[toRangesIndex++] = toStart;

1033

toRanges[toRangesIndex++] = toStart + (fromCount - 1) / 3;

1034

}

1035

1036

return new MapList(fromRanges, toRanges, 3, 1);

}

/**

* Parse a GFF format feature. This may include creating a 'dummy' sequence to

1041

* hold the feature, or for its mapped sequence, or both, to be resolved

1042

* either later in the GFF file (##FASTA section), or when the user loads

1043

* additional sequences.

* @param gffColumns

* @param alignment

* @param relaxedIdMatching

* @param newseqs

* @return

protected SequenceI parseGff(String[] gffColumns, AlignmentI alignment,

1052

boolean relaxedIdMatching, List<SequenceI> newseqs)

1053

{

1054

1055

* GFF: seqid source type start end score strand phase [attributes]

1056

1057

if (gffColumns.length < 5)

1058

{

1059

System.err.println("Ignoring GFF feature line with too few columns ("

1060

+ gffColumns.length + ")");

return null;

}

* locate referenced sequence in alignment _or_

1066

* as a forward or external reference (SequenceDummy)

1067

1068

String seqId = gffColumns[0];

1069

SequenceI seq = findSequence(seqId, alignment, newseqs,

1070

relaxedIdMatching);

1071

1072

SequenceFeature sf = null;

1073

GffHelperI helper = GffHelperFactory.getHelper(gffColumns);

if (helper != null)

{

try

{

sf = helper.processGff(seq, gffColumns, alignment, newseqs,

relaxedIdMatching);

if (sf != null)

{

seq.addSequenceFeature(sf);

1083

while ((seq = alignment.findName(seq, seqId, true)) != null)

1084

{

1085

seq.addSequenceFeature(new SequenceFeature(sf));

1086

}

1087

}

1088

} catch (IOException e)

1089

{

1090

System.err.println("GFF parsing failed with: " + e.getMessage());

return null;

}

}

return seq;

}

/**

* Process the 'column 9' data of the GFF file. This is less formally defined,

1100

* and its interpretation will vary depending on the tool that has generated

* it.

* @param attributes

* @param sf

protected void processGffColumnNine(String attributes, SequenceFeature sf)

1107

{

1108

sf.setAttributes(attributes);

1109

1110

1111

* Parse attributes in column 9 and add them to the sequence feature's

1112

* 'otherData' table; use Note as a best proxy for description

1113

1114

char nameValueSeparator = gffVersion == 3 ? '=' : ' ';

1115

// TODO check we don't break GFF2 values which include commas here

1116

Map<String, List<String>> nameValues = GffHelperBase

1117

.parseNameValuePairs(attributes, ";", nameValueSeparator, ",");

1118

for (Entry<String, List<String>> attr : nameValues.entrySet())

1119

{

1120

String values = StringUtils.listToDelimitedString(attr.getValue(),

1121

"; ");

1122

sf.setValue(attr.getKey(), values);

1123

if (NOTE.equals(attr.getKey()))

1124

{

1125

sf.setDescription(values);

}

}

}

/**

* After encountering ##fasta in a GFF3 file, process the remainder of the

1132

* file as FAST sequence data. Any placeholder sequences created during

1133

* feature parsing are updated with the actual sequences.

* @param align

* @param newseqs

* @throws IOException

1138

1139

protected void processAsFasta(AlignmentI align, List<SequenceI> newseqs)

throws IOException

{

try

{

mark();

} catch (IOException q)

1146

{

1147

}

1148

FastaFile parser = new FastaFile(this);

1149

List<SequenceI> includedseqs = parser.getSeqs();

1150

1151

SequenceIdMatcher smatcher = new SequenceIdMatcher(newseqs);

1152

1153

1154

* iterate over includedseqs, and replacing matching ones with newseqs

1155

* sequences. Generic iterator not used here because we modify

1156

* includedseqs as we go

1157

1158

for (int p = 0, pSize = includedseqs.size(); p < pSize; p++)

1159

{

1160

// search for any dummy seqs that this sequence can be used to update

1161

SequenceI includedSeq = includedseqs.get(p);

1162

SequenceI dummyseq = smatcher.findIdMatch(includedSeq);

1163

if (dummyseq != null && dummyseq instanceof SequenceDummy)

1164

{

1165

// probably have the pattern wrong

1166

// idea is that a flyweight proxy for a sequence ID can be created for

1167

// 1. stable reference creation

1168

// 2. addition of annotation

1169

// 3. future replacement by a real sequence

1170

// current pattern is to create SequenceDummy objects - a convenience

1171

// constructor for a Sequence.

1172

// problem is that when promoted to a real sequence, all references

1173

// need to be updated somehow. We avoid that by keeping the same object.

1174

((SequenceDummy) dummyseq).become(includedSeq);

1175

dummyseq.createDatasetSequence();

1176

1177

1178

* Update mappings so they are now to the dataset sequence

1179

1180

for (AlignedCodonFrame mapping : align.getCodonFrames())

1181

{

1182

mapping.updateToDataset(dummyseq);

}

* replace parsed sequence with the realised forward reference

1187

1188

includedseqs.set(p, dummyseq);

1189

1190

1191

* and remove from the newseqs list

1192

1193

newseqs.remove(dummyseq);

}

}

* finally add sequences to the dataset

1199

1200

for (SequenceI seq : includedseqs)

1201

{

1202

// experimental: mapping-based 'alignment' to query sequence

1203

AlignmentUtils.alignSequenceAs(seq, align,

1204

String.valueOf(align.getGapCharacter()), false, true);

1205

1206

// rename sequences if GFF handler requested this

1207

// TODO a more elegant way e.g. gffHelper.postProcess(newseqs) ?

1208

List<SequenceFeature> sfs = seq.getFeatures().getPositionalFeatures();

1209

if (!sfs.isEmpty())

1210

{

1211

String newName = (String) sfs.get(0).getValue(

1212

GffHelperI.RENAME_TOKEN);

1213

if (newName != null)

1214

{

1215

seq.setName(newName);

1216

}

1217

}

1218

align.addSequence(seq);

}

}

/**

* Process a ## directive

* @param line

* @param gffProps

* @param align

* @param newseqs

* @throws IOException

1230

1231

protected void processGffPragma(String line, Map<String, String> gffProps,

1232

AlignmentI align, List<SequenceI> newseqs) throws IOException

1233

{

1234

line = line.trim();

1235

if ("###".equals(line))

1236

{

1237

// close off any open 'forward references'

return;

}

String[] tokens = line.substring(2).split(" ");

1242

String pragma = tokens[0];

1243

String value = tokens.length == 1 ? null : tokens[1];

1244

1245

if ("gff-version".equalsIgnoreCase(pragma))

{

if (value != null)

{

try

{

// value may be e.g. "3.1.2"

1252

gffVersion = Integer.parseInt(value.split("\\.")[0]);

1253

} catch (NumberFormatException e)

{

// ignore

}

}

}

else if ("sequence-region".equalsIgnoreCase(pragma))

1260

{

1261

// could capture <seqid start end> if wanted here

1262

}

1263

else if ("feature-ontology".equalsIgnoreCase(pragma))

1264

{

1265

// should resolve against the specified feature ontology URI

1266

}

1267

else if ("attribute-ontology".equalsIgnoreCase(pragma))

1268

{

1269

// URI of attribute ontology - not currently used in GFF3

1270

}

1271

else if ("source-ontology".equalsIgnoreCase(pragma))

1272

{

1273

// URI of source ontology - not currently used in GFF3

1274

}

1275

else if ("species-build".equalsIgnoreCase(pragma))

1276

{

1277

// save URI of specific NCBI taxon version of annotations

1278

gffProps.put("species-build", value);

1279

}

1280

else if ("fasta".equalsIgnoreCase(pragma))

1281

{

1282

// process the rest of the file as a fasta file and replace any dummy

1283

// sequence IDs

1284

processAsFasta(align, newseqs);

}

else

{

System.err.println("Ignoring unknown pragma: " + line);

1289

}

1290

}

1291

}

jalviewX

File FeaturesFile.java

Coverage histogram

Code metrics

Classes

Class FeaturesFile

Contributing tests

Contributing tests

Source view