File TCoffeeScoreFile.java

Branches:

Statements:

165

Methods:

Classes:

LOC:

656

NCLOC:

390

Total complexity:

Complexity density:

0.46

Statements/Method:

8.68

Methods/Class:

6.33

Average method complexity:

Classes

Class	Line #	Total Statements	Complexity	Uncovered Elements	TOTAL Coverage
TCoffeeScoreFile	92	159	70	72	0.7198443472%
TCoffeeScoreFile.Header	500	2	3	1	0.833333383.3%
TCoffeeScoreFile.Block	524	4	3	0	1.0100%

Class TCoffeeScoreFile

Class TCoffeeScoreFile	Line # 92	Total Statements 159	Complexity 70	Uncovered Elements 72	TOTAL Coverage 0.7198443472%
TCoffeeScoreFile(Object,DataSourceType) TCoffeeScoreFile(Object,DataSourceType)	126126	1.01	1.01	0.00	1.0 1.0100%
TCoffeeScoreFile(FileParse) TCoffeeScoreFile(FileParse)	134134	1.01	1.01	1.01	0.0 0.00%
getHeight() : int getHeight() : int	157157	1.01	3.03	1.01	0.6666667 0.666666766.7%
getWidth() : int getWidth() : int	171171	1.01	2.02	1.01	0.6666667 0.666666766.7%
getScoresFor(String) : String getScoresFor(String) : String	184184	1.01	3.03	1.01	0.6666667 0.666666766.7%
getScoresList() : List<String> getScoresList() : List<String>	195195	6.06	2.02	2.02	0.75 0.7575%
getScoresArray() : byte[][] getScoresArray() : byte[][]	213213	12.012	5.05	2.02	0.8888889 0.888888988.9%
parse() : void parse() : void	240240	24.024	6.06	10.010	0.7058824 0.705882470.6%
parseInt(String) : int parseInt(String) : int	309309	3.03	2.02	1.01	0.6666667 0.666666766.7%
readHeader(FileParse) : Header readHeader(FileParse) : Header	330330	34.034	15.015	22.022	0.5925926 0.592592659.3%
error(FileParse,String) : void error(FileParse,String) : void	412412	4.04	2.02	2.02	0.6666667 0.666666766.7%
readBlock(FileParse,int) : Block readBlock(FileParse,int) : Block	437437	21.021	9.09	11.011	0.6857143 0.685714368.6%
annotateAlignment(AlignmentI,boolean) : boolean annotateAlignment(AlignmentI,boolean) : boolean	556556	49.049	18.018	15.015	0.79452056 0.7945205679.5%
print(SequenceI[],boolean) : String print(SequenceI[],boolean) : String	650650	1.01	1.01	1.01	0.0 0.00%

Class TCoffeeScoreFile.Header

Class TCoffeeScoreFile.Header	Line # 500	Total Statements 2	Complexity 3	Uncovered Elements 1	TOTAL Coverage 0.833333383.3%
getScoreAvg() : int getScoreAvg() : int	508508	1.01	1.01	0.00	1.0 1.0100%
getScoreFor(String) : int getScoreFor(String) : int	513513	1.01	2.02	1.01	0.6666667 0.666666766.7%

Class TCoffeeScoreFile.Block

Class TCoffeeScoreFile.Block	Line # 524	Total Statements 4	Complexity 3	TOTAL Coverage 1.0100%
Block(int) Block(int)	530530	2.02	1.01	1.0 1.0100%
getScoresFor(String) : String getScoresFor(String) : String	536536	1.01	1.01	1.0 1.0100%
getConsensus() : String getConsensus() : String	541541	1.01	1.01	1.0 1.0100%

Contributing tests

This file is covered by 11 tests. .

Contributing tests

Test contribution	Test	Result
0.6333333	jalview.io.Jalview2xmlTests.testTCoffeeScoresjalview.io.Jalview2xmlTests.testTCoffeeScores	1PASS
0.37407407	jalview.io.TCoffeeScoreFileTest.testGetAsArrayjalview.io.TCoffeeScoreFileTest.testGetAsArray	1PASS
0.33703703	jalview.io.TCoffeeScoreFileTest.testGetAsListjalview.io.TCoffeeScoreFileTest.testGetAsList	1PASS
0.33703703	jalview.io.TCoffeeScoreFileTest.testHeightAndWidthWithResidueNumbersjalview.io.TCoffeeScoreFileTest.testHeightAndWidthWithResidueNumbers	1PASS
0.33333334	jalview.io.TCoffeeScoreFileTest.testHeightAndWidthjalview.io.TCoffeeScoreFileTest.testHeightAndWidth	1PASS
0.32222223	jalview.io.TCoffeeScoreFileTest.testReadHeaderjalview.io.TCoffeeScoreFileTest.testReadHeader	1PASS
0.32222223	jalview.io.TCoffeeScoreFileTest.testParsejalview.io.TCoffeeScoreFileTest.testParse	1PASS
0.1037037	jalview.io.TCoffeeScoreFileTest.testReadBlockjalview.io.TCoffeeScoreFileTest.testReadBlock	1PASS
0.09259259	jalview.io.gff.GffTests.testResolveExonerateGffjalview.io.gff.GffTests.testResolveExonerateGff	1PASS
0.09259259	jalview.io.gff.ExonerateHelperTest.testAddExonerateGffToAlignmentjalview.io.gff.ExonerateHelperTest.testAddExonerateGffToAlignment	1PASS
0.09259259	jalview.io.TCoffeeScoreFileTest.testWrongFilejalview.io.TCoffeeScoreFileTest.testWrongFile	1PASS

Source view

* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)

* Copyright (C) $$Year-Rel$$ The Jalview Authors

* This file is part of Jalview.

* Jalview is free software: you can redistribute it and/or

* modify it under the terms of the GNU General Public License

* as published by the Free Software Foundation, either version 3

* of the License, or (at your option) any later version.

* Jalview is distributed in the hope that it will be useful, but

* WITHOUT ANY WARRANTY; without even the implied warranty

* of MERCHANTABILITY or FITNESS FOR A PARTICULAR

* PURPOSE. See the GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with Jalview. If not, see <http://www.gnu.org/licenses/>.

* The Jalview Authors are detailed in the 'AUTHORS' file.

package jalview.io;

import jalview.analysis.SequenceIdMatcher;

import jalview.datamodel.AlignmentAnnotation;

import jalview.datamodel.AlignmentI;

import jalview.datamodel.Annotation;

import jalview.datamodel.SequenceI;

import java.awt.Color;

import java.io.IOException;

import java.util.ArrayList;

import java.util.HashMap;

import java.util.LinkedHashMap;

import java.util.List;

import java.util.Map;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

/**

* A file parser for T-Coffee score ascii format. This file contains the

* alignment consensus for each residue in any sequence.

* <p>

* This file is produced by <code>t_coffee</code> providing the option

* <code>-output=score_ascii </code> to the program command line

* An example file is the following

* <pre>

* T-COFFEE, Version_9.02.r1228 (2012-02-16 18:15:12 - Revision 1228 - Build 336)

* Cedric Notredame

* CPU TIME:0 sec.

* SCORE=90

* *

* BAD AVG GOOD

* *

* 1PHT : 89

* 1BB9 : 90

* 1UHC : 94

* 1YCS : 94

* 1OOT : 93

* 1ABO : 94

* 1FYN : 94

* 1QCF : 94

* cons : 90

* 1PHT 999999999999999999999999998762112222543211112134

* 1BB9 99999999999999999999999999987-------4322----2234

* 1UHC 99999999999999999999999999987-------5321----2246

* 1YCS 99999999999999999999999999986-------4321----1-35

* 1OOT 999999999999999999999999999861-------3------1135

* 1ABO 99999999999999999999999999986-------422-------34

* 1FYN 99999999999999999999999999985-------32--------35

* 1QCF 99999999999999999999999999974-------2---------24

* cons 999999999999999999999999999851000110321100001134

* 1PHT ----------5666642367889999999999889

* 1BB9 1111111111676653-355679999999999889

* 1UHC ----------788774--66789999999999889

* 1YCS ----------78777--356789999999999889

* 1OOT ----------78877--356789999999997-67

* 1ABO ----------687774--56779999999999889

* 1FYN ----------6888842356789999999999889

* 1QCF ----------6878742356789999999999889

* cons 00100000006877641356789999999999889

* </pre>

* @author Paolo Di Tommaso

public class TCoffeeScoreFile extends AlignFile

{

/**

* TCOFFEE score colourscheme

static final Color[] colors = { new Color(102, 102, 255), // 0: lilac #6666FF

new Color(0, 255, 0), // 1: green #00FF00

100

new Color(102, 255, 0), // 2: lime green #66FF00

101

new Color(204, 255, 0), // 3: greeny yellow #CCFF00

102

new Color(255, 255, 0), // 4: yellow #FFFF00

103

new Color(255, 204, 0), // 5: orange #FFCC00

104

new Color(255, 153, 0), // 6: deep orange #FF9900

105

new Color(255, 102, 0), // 7: ochre #FF6600

106

new Color(255, 51, 0), // 8: red #FF3300

107

new Color(255, 34, 0) // 9: redder #FF2000

108

};

109

110

public final static String TCOFFEE_SCORE = "TCoffeeScore";

111

112

static Pattern SCORES_WITH_RESIDUE_NUMS = Pattern

113

.compile("^\\d+\\s([^\\s]+)\\s+\\d+$");

114

115

/** The {@link Header} structure holder */

Header header;

/**

* Holds the consensues values for each sequences. It uses a LinkedHashMap to

120

* maintaint the insertion order.

121

122

LinkedHashMap<String, StringBuilder> scores;

Integer fWidth;

public TCoffeeScoreFile(Object inFile, DataSourceType fileSourceType)

127

throws IOException

128

{

129

// BH 2018 allows File or String

130

super(inFile, fileSourceType);

}

public TCoffeeScoreFile(FileParse source) throws IOException

{

super(source);

}

/**

* Parse the provided reader for the T-Coffee scores file format

141

142

* @param reader

143

* public static TCoffeeScoreFile load(Reader reader) {

144

145

* try { BufferedReader in = (BufferedReader) (reader instanceof

146

* BufferedReader ? reader : new BufferedReader(reader));

147

* TCoffeeScoreFile result = new TCoffeeScoreFile();

148

* result.doParsing(in); return result.header != null &&

149

* result.scores != null ? result : null; } catch( Exception e) {

150

* throw new RuntimeException(e); } }

/**

* @return The 'height' of the score matrix i.e. the numbers of score rows

155

* that should matches the number of sequences in the alignment

156

157

public int getHeight()

158

{

159

// the last entry will always be the 'global' alingment consensus scores, so

160

// it is removed

161

// from the 'height' count to make this value compatible with the number of

162

// sequences in the MSA

163

return scores != null && scores.size() > 0 ? scores.size() - 1 : 0;

}

/**

* @return The 'width' of the score matrix i.e. the number of columns. Since

168

* the score value are supposed to be calculated for an 'aligned' MSA,

169

* all the entries have to have the same width.

170

171

public int getWidth()

172

{

173

return fWidth != null ? fWidth : 0;

}

/**

* Get the string of score values for the specified seqeunce ID.

* @param id

* The sequence ID

* @return The scores as a string of values e.g. {@code 99999987-------432}.

182

* It return an empty string when the specified ID is missing.

183

184

public String getScoresFor(String id)

185

{

186

return scores != null && scores.containsKey(id)

187

? scores.get(id).toString()

: "";

}

/**

* @return The list of score string as a {@link List} object, in the same

193

* ordeer of the insertion i.e. in the MSA

194

195

public List<String> getScoresList()

{

if (scores == null)

{

return null;

}

List<String> result = new ArrayList<String>(scores.size());

202

for (Map.Entry<String, StringBuilder> it : scores.entrySet())

203

{

204

result.add(it.getValue().toString());

}

return result;

}

/**

* @return The parsed score values a matrix of bytes

212

213

public byte[][] getScoresArray()

{

if (scores == null)

{

return null;

}

byte[][] result = new byte[scores.size()][];

220

221

int rowCount = 0;

222

for (Map.Entry<String, StringBuilder> it : scores.entrySet())

223

{

224

String line = it.getValue().toString();

225

byte[] seqValues = new byte[line.length()];

226

3284

for (int j = 0, c = line.length(); j < c; j++)

227

{

228

229

3259

byte val = (byte) (line.charAt(j) - '0');

230

231

3259

seqValues[j] = (val >= 0 && val <= 9) ? val : -1;

232

}

233

234

result[rowCount++] = seqValues;

}

return result;

}

@Override

public void parse() throws IOException

{

* read the header

header = readHeader(this);

if (header == null)

{

error = true;

return;

}

scores = new LinkedHashMap<String, StringBuilder>();

254

255

256

* initilize the structure

257

258

for (Map.Entry<String, Integer> entry : header.scores.entrySet())

259

{

260

scores.put(entry.getKey(), new StringBuilder());

}

* go with the reading

265

266

Block block;

267

while ((block = readBlock(this, header.scores.size())) != null)

{

* append sequences read in the block

272

273

for (Map.Entry<String, String> entry : block.items.entrySet())

274

{

275

166

StringBuilder scoreStringBuilder = scores.get(entry.getKey());

276

166

if (scoreStringBuilder == null)

277

{

278

error = true;

279

errormessage = String.format(

280

"Invalid T-Coffee score file: Sequence ID '%s' is not declared in header section",

entry.getKey());

return;

}

166

scoreStringBuilder.append(entry.getValue());

}

}

* verify that all rows have the same width

291

292

for (StringBuilder str : scores.values())

{

if (fWidth == null)

{

fWidth = str.length();

297

}

298

else if (fWidth != str.length())

299

{

300

error = true;

301

errormessage = "Invalid T-Coffee score file: All the score sequences must have the same length";

return;

}

}

return;

}

static int parseInt(String str)

{

try

{

return Integer.parseInt(str);

314

} catch (NumberFormatException e)

315

{

316

// TODO report a warning ?

return 0;

}

}

/**

* Reaad the header section in the T-Coffee score file format

* @param reader

* The scores reader

* @return The parser {@link Header} instance

327

* @throws RuntimeException

328

* when the header is not in the expected format

329

330

static Header readHeader(FileParse reader) throws IOException

331

{

332

333

Header result = null;

334

try

335

{

336

result = new Header();

337

result.head = reader.nextLine();

String line;

while ((line = reader.nextLine()) != null)

342

{

343

if (line.startsWith("SCORE="))

344

{

345

result.score = parseInt(line.substring(6).trim());

break;

}

}

if ((line = reader.nextLine()) == null || !"*".equals(line.trim()))

351

{

352

error(reader,

353

"Invalid T-COFFEE score format (NO BAD/AVG/GOOD header)");

354

return null;

355

}

356

if ((line = reader.nextLine()) == null

357

|| !"BAD AVG GOOD".equals(line.trim()))

358

{

359

error(reader,

360

"Invalid T-COFFEE score format (NO BAD/AVG/GOOD header)");

361

return null;

362

}

363

if ((line = reader.nextLine()) == null || !"*".equals(line.trim()))

364

{

365

error(reader,

366

"Invalid T-COFFEE score format (NO BAD/AVG/GOOD header)");

return null;

}

* now are expected a list if sequences ID up to the first blank line

372

373

while ((line = reader.nextLine()) != null)

{

if ("".equals(line))

{

break;

}

int p = line.indexOf(":");

381

if (p == -1)

382

{

383

// TODO report a warning

continue;

}

String id = line.substring(0, p).trim();

388

int val = parseInt(line.substring(p + 1).trim());

389

if ("".equals(id))

390

{

391

// TODO report warning

continue;

}

result.scores.put(id, val);

}

if (result == null)

{

error(reader, "T-COFFEE score file had no per-sequence scores");

401

}

402

403

} catch (IOException e)

404

{

405

error(reader, "Unexpected problem parsing T-Coffee score ascii file");

throw e;

}

return result;

}

private static void error(FileParse reader, String errm)

413

{

414

reader.error = true;

415

if (reader.errormessage == null)

416

{

417

reader.errormessage = errm;

}

else

{

reader.errormessage += "\n" + errm;

}

}

/**

* Read a scores block ihe provided stream.

427

428

* @param reader

429

* The stream to parse

430

* @param size

431

* The expected number of the sequence to be read

432

* @return The {@link Block} instance read or {link null} null if the end of

433

* file has reached.

434

* @throws IOException

435

* Something went wrong on the 'wire'

436

437

static Block readBlock(FileParse reader, int size) throws IOException

438

{

439

Block result = new Block(size);

String line;

* read blank lines (eventually)

444

445

while ((line = reader.nextLine()) != null && "".equals(line.trim()))

446

{

447

// consume blank lines

}

if (line == null)

{

return null;

}

* read the scores block

{

187

if ("".equals(line.trim()))

{

// terminated

break;

}

// split the line on the first blank

467

// the first part have to contain the sequence id

468

// the remaining part are the scores values

469

175

int p = line.indexOf(" ");

470

175

if (p == -1)

471

{

472

if (reader.warningMessage == null)

473

{

474

reader.warningMessage = "";

475

}

476

reader.warningMessage += "Possible parsing error - expected to find a space in line: '"

+ line + "'\n";

continue;

}

175

String id = line.substring(0, p).trim();

482

175

String val = line.substring(p + 1).trim();

483

484

175

Matcher m = SCORES_WITH_RESIDUE_NUMS.matcher(val);

485

175

if (m.matches())

{

val = m.group(1);

}

175

result.items.put(id, val);

491

492

} while ((line = reader.nextLine()) != null);

return result;

}

* The score file header

static class Header

{

String head;

int score;

LinkedHashMap<String, Integer> scores = new LinkedHashMap<String, Integer>();

507

508

public int getScoreAvg()

{

return score;

}

public int getScoreFor(String ID)

514

{

515

516

return scores.containsKey(ID) ? scores.get(ID) : -1;

}

}

* Hold a single block values block in the score file

static class Block

{

int size;

Map<String, String> items;

529

530

public Block(int size)

531

{

532

this.size = size;

533

this.items = new HashMap<String, String>(size);

534

}

535

536

String getScoresFor(String id)

537

{

538

return items.get(id);

539

}

540

541

String getConsensus()

542

{

543

return items.get("cons");

}

}

/**

* generate annotation for this TCoffee score set on the given alignment

549

550

* @param al

551

* alignment to annotate

552

* @param matchids

553

* if true, annotate sequences based on matching sequence names

554

* @return true if alignment annotation was modified, false otherwise.

555

556

public boolean annotateAlignment(AlignmentI al, boolean matchids)

557

{

558

if (al.getHeight() != getHeight() || al.getWidth() != getWidth())

559

{

560

String info = String.format(

561

"align w: %s, h: %s; score: w: %s; h: %s ", al.getWidth(),

562

al.getHeight(), getWidth(), getHeight());

563

warningMessage = "Alignment shape does not match T-Coffee score file shape -- "

+ info;

return false;

}

boolean added = false;

568

int i = 0;

569

SequenceIdMatcher sidmatcher = new SequenceIdMatcher(

570

al.getSequencesArray());

571

byte[][] scoreMatrix = getScoresArray();

572

// for 2.8 - we locate any existing TCoffee annotation and remove it first

573

// before adding this.

574

for (Map.Entry<String, StringBuilder> id : scores.entrySet())

575

{

576

byte[] srow = scoreMatrix[i];

SequenceI s;

if (matchids)

{

s = sidmatcher.findIdMatch(id.getKey());

}

else

{

s = al.getSequenceAt(i);

585

}

586

i++;

587

if (s == null && i != scores.size() && !id.getKey().equals("cons"))

588

{

589

System.err

590

.println("No " + (matchids ? "match " : " sequences left ")

591

+ " for TCoffee score set : " + id.getKey());

592

continue;

593

}

594

int jSize = al.getWidth() < srow.length ? al.getWidth() : srow.length;

595

Annotation[] annotations = new Annotation[al.getWidth()];

596

2528

for (int j = 0; j < jSize; j++)

597

{

598

2512

byte val = srow[j];

599

2512

if (s != null && jalview.util.Comparison.isGap(s.getCharAt(j)))

600

{

601

334

annotations[j] = null;

602

334

if (val > 0)

603

{

604

System.err.println(

605

"Warning: non-zero value for positional T-COFFEE score for gap at "

606

+ j + " in sequence " + s.getName());

}

}

else

{

2178

annotations[j] = new Annotation(s == null ? "" + val : null,

612

2178

s == null ? "" + val : null, '\0', val * 1f,

613

2178

val >= 0 && val < colors.length ? colors[val]

: Color.white);

}

}

// this will overwrite any existing t-coffee scores for the alignment

618

AlignmentAnnotation aa = al.findOrCreateAnnotation(TCOFFEE_SCORE,

619

TCOFFEE_SCORE, false, s, null);

620

if (s != null)

621

{

622

aa.label = "T-COFFEE";

623

aa.description = "" + id.getKey();

624

aa.annotations = annotations;

625

aa.visible = false;

626

aa.belowAlignment = false;

627

aa.setScore(header.getScoreFor(id.getKey()));

628

aa.createSequenceMapping(s, s.getStart(), true);

629

s.addAlignmentAnnotation(aa);

630

aa.adjustForAlignment();

}

else

{

aa.graph = AlignmentAnnotation.NO_GRAPH;

635

aa.label = "T-COFFEE";

636

aa.description = "TCoffee column reliability score";

637

aa.annotations = annotations;

638

aa.belowAlignment = true;

639

aa.visible = true;

640

aa.setScore(header.getScoreAvg());

641

}

642

aa.showAllColLabels = true;

643

aa.validateRangeAndDisplay();

added = true;

}

return added;

}

@Override

public String print(SequenceI[] sqs, boolean jvsuffix)

652

{

653

// TODO Auto-generated method stub

654

return "Not valid.";

655

}

656

}

jalviewX

File TCoffeeScoreFile.java

Coverage histogram

Code metrics

Classes

Class TCoffeeScoreFile

Class TCoffeeScoreFile.Header

Class TCoffeeScoreFile.Block

Contributing tests

Contributing tests

Source view