File ValueAnnotationDistanceModel.java

Branches:

Statements:

132

Methods:

Classes:

LOC:

475

NCLOC:

308

Total complexity:

Complexity density:

0.44

Statements/Method:

10.15

Methods/Class:

Average method complexity:

4.46

Classes

Class	Line #	Total Statements	Complexity	TOTAL Coverage	Actions
ValueAnnotationDistanceModel	49	132	58	0.0414746554.1%

Class ValueAnnotationDistanceModel

Class ValueAnnotationDistanceModel	Line # 49	Total Statements 132	Complexity 58	TOTAL Coverage 0.0414746554.1%
ValueAnnotationDistanceModel() ValueAnnotationDistanceModel()	6060	0.00	1.01	-1.0 -1.0 -
getInstance(AlignmentViewPanel) : ScoreModelI getInstance(AlignmentViewPanel) : ScoreModelI	6565	8.08	3.03	0.0 0.00%
configureFromAlignmentView(AlignmentViewPanel) : boolean configureFromAlignmentView(AlignmentViewPanel) : boolean	8585	2.02	1.01	0.0 0.00%
expandSeqData(SequenceI[],AlignmentView,SimilarityParamsI,List<String>,ArrayList<AlignmentAnnotation>,HashMap<Integer, String>) : SequenceI[] expandSeqData(SequenceI[],AlignmentView,SimilarityParamsI,List<String>,ArrayList<AlignmentAnnotation>,HashMap<Integer, String>) : SequenceI[]	9494	33.033	9.09	0.0 0.00%
configureMode(boolean,boolean,boolean) : void configureMode(boolean,boolean,boolean) : void	184184	3.03	1.01	0.0 0.00%
findDistances(AlignmentView,SimilarityParamsI) : MatrixI findDistances(AlignmentView,SimilarityParamsI) : MatrixI	209209	74.074	32.032	0.0 0.00%
findSeqsWithoutGapAtColumn(SeqCigar[],int) : Set<SeqCigar> findSeqsWithoutGapAtColumn(SeqCigar[],int) : Set<SeqCigar>	421421	6.06	2.02	0.0 0.00%
getName() : String getName() : String	439439	1.01	1.01	1.0 1.0100%
getDescription() : String getDescription() : String	445445	1.01	4.04	0.0 0.00%
isDNA() : boolean isDNA() : boolean	451451	1.01	1.01	1.0 1.0100%
isProtein() : boolean isProtein() : boolean	457457	1.01	1.01	1.0 1.0100%
isSecondaryStructure() : boolean isSecondaryStructure() : boolean	463463	1.01	1.01	1.0 1.0100%
toString() : String toString() : String	469469	1.01	1.01	0.0 0.00%

Contributing tests

This file is covered by 3 tests. .

Contributing tests

Test contribution	Test	Result
0.02764977	jalview.gui.CalculationChooserTest.testGetApplicableScoreModelsjalview.gui.CalculationChooserTest.testGetApplicableScoreModels	1PASS
0.013824885	jalview.io.cache.JvCacheableInputBoxTest.updateCacheTestjalview.io.cache.JvCacheableInputBoxTest.updateCacheTest	1PASS
0.013824885	jalview.io.ScoreMatrixFileTest.testParse_ncbiFormatjalview.io.ScoreMatrixFileTest.testParse_ncbiFormat	1PASS

Source view

* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)

* Copyright (C) $$Year-Rel$$ The Jalview Authors

* This file is part of Jalview.

* Jalview is free software: you can redistribute it and/or

* modify it under the terms of the GNU General Public License

* as published by the Free Software Foundation, either version 3

* of the License, or (at your option) any later version.

* Jalview is distributed in the hope that it will be useful, but

* WITHOUT ANY WARRANTY; without even the implied warranty

* of MERCHANTABILITY or FITNESS FOR A PARTICULAR

* PURPOSE. See the GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with Jalview. If not, see <http://www.gnu.org/licenses/>.

* The Jalview Authors are detailed in the 'AUTHORS' file.

package jalview.analysis.scoremodels;

import jalview.analysis.AlignmentAnnotationUtils;

import jalview.analysis.AlignmentUtils;

import jalview.api.AlignmentViewPanel;

import jalview.api.FeatureRenderer;

import jalview.api.analysis.ScoreModelI;

import jalview.api.analysis.SimilarityParamsI;

import jalview.bin.Console;

import jalview.datamodel.AlignmentAnnotation;

import jalview.datamodel.AlignmentView;

import jalview.datamodel.Annotation;

import jalview.datamodel.SeqCigar;

import jalview.datamodel.SequenceI;

import jalview.math.Matrix;

import jalview.math.MatrixI;

import jalview.util.Constants;

import java.util.ArrayList;

import java.util.HashMap;

import java.util.HashSet;

import java.util.List;

import java.util.Map;

import java.util.Set;

/* This class contains methods to calculate distance score between

* secondary structure annotations of the sequences.

public class ValueAnnotationDistanceModel extends DistanceScoreModel

{

private static final String NAME = "Annotation tracks";

private String description;

FeatureRenderer fr;

/**

* Constructor

public ValueAnnotationDistanceModel()

{

}

@Override

public ScoreModelI getInstance(AlignmentViewPanel view)

{

ValueAnnotationDistanceModel instance;

try

{

instance = this.getClass().getDeclaredConstructor().newInstance();

instance.configureFromAlignmentView(view);

return instance;

} catch (InstantiationException | IllegalAccessException e)

{

jalview.bin.Console.errPrintln("Error in " + getClass().getName()

+ ".getInstance(): " + e.getMessage());

return null;

} catch (ReflectiveOperationException roe)

{

return null;

}

boolean configureFromAlignmentView(AlignmentViewPanel view)

{

fr = view.cloneFeatureRenderer();

return true;

}

ArrayList<AlignmentAnnotation> ssForSeqs = null;

@Override

public SequenceI[] expandSeqData(SequenceI[] sequences,

AlignmentView seqData, SimilarityParamsI scoreParams,

List<String> labels,

ArrayList<AlignmentAnnotation> ssAnnotationForSeqs,

HashMap<Integer, String> annotationDetails)

100

{

101

ssForSeqs = new ArrayList<AlignmentAnnotation>();

102

List<SequenceI> newSequences = new ArrayList<SequenceI>();

103

List<SeqCigar> newCigs = new ArrayList<SeqCigar>();

104

int sq = 0;

105

106

AlignmentAnnotation[] alignAnnotList = fr.getViewport().getAlignment()

107

.getAlignmentAnnotation();

108

109

110

* Add secondary structure annotations that are added to the annotation track

111

* to the map

112

113

Map<SequenceI, ArrayList<AlignmentAnnotation>> ssAlignmentAnnotationForSequences = AlignmentUtils

114

.getSequenceAssociatedAlignmentAnnotations(alignAnnotList, null,

115

true);

116

117

for (SeqCigar scig : seqData.getSequences())

118

{

119

// get the next sequence that should be bound to this scig: may be null

120

SequenceI alSeq = sequences[sq++];

121

List<AlignmentAnnotation> ssec = ssAlignmentAnnotationForSequences

122

.get(scig.getRefSeq());

123

124

if (ssec == null && scoreParams.getExcludeSeqWithoutAnnot())

{

continue;

}

else if (ssec == null)

129

{

130

// not defined

131

newSequences.add(alSeq);

132

if (alSeq != null)

133

{

134

// labels.add("No Secondary Structure");

135

labels.add(Constants.STRUCTURE_PROVIDERS.get("None"));

136

}

137

SeqCigar newSeqCigar = scig; // new SeqCigar(scig);

138

newCigs.add(newSeqCigar);

ssForSeqs.add(null);

}

else

{

for (int i = 0; i < ssec.size(); i++)

{

if (alSeq != null)

{

// Add annotationDetails if the annotation has

148

// ANNOTATION_DETAILS property value (additional metadata)

149

150

151

String provider = "";

152

153

if (ssec.get(i).hasAnnotationDetailsProperty())

154

{

155

// using key = labels.size() gives the position of the node

156

annotationDetails.put(labels.size(),

157

ssec.get(i).getAnnotationDetailsProperty());

158

} else {

159

annotationDetails.put(labels.size(),

160

ssec.get(i).label+(ssec.get(i).description!=null ? ssec.get(i).description:""));

161

provider = alSeq.getName();

162

}

163

// .extractSSSourceFromAnnotationDescription(ssec.get(i));

164

labels.add(provider);

165

}

166

newSequences.add(alSeq);

167

SeqCigar newSeqCigar = scig; // new SeqCigar(scig);

168

newCigs.add(newSeqCigar);

169

ssForSeqs.add(ssec.get(i));

}

}

}

ssAnnotationForSeqs.addAll(ssForSeqs);

174

seqData.setSequences(newCigs.toArray(new SeqCigar[0]));

175

return newSequences.toArray(new SequenceI[0]);

}

boolean hamming=false; // one not equals other

181

boolean logIt = false; // log1p scale before difference calc

182

boolean relDiff = true; // compute difference of normalised values w.r.t. each rows max/min

183

184

protected void configureMode(boolean hamming, boolean logIt, boolean relDiff) {

185

this.hamming=hamming;

186

this.logIt=logIt;

187

this.relDiff=relDiff;

}

/**

* Calculates distance score [i][j] between each pair of protein sequences

192

* based on their secondary structure annotations (H, E, C). The final score

193

* is normalised by the number of alignment columns processed, providing an

194

* average similarity score.

195

* <p>

196

* The parameters argument can include settings for handling gap-residue

197

* aligned positions and may determine if the score calculation is based on

198

* the longer or shorter sequence in each pair. This can be important for

199

* handling partial alignments or sequences of significantly different

* lengths.

* @param seqData

* The aligned sequence data including secondary structure

204

* annotations.

205

* @param params

206

* Additional parameters for customising the scoring process, such as

207

* gap handling and sequence length consideration.

208

209

@Override

210

public MatrixI findDistances(AlignmentView seqData,

211

SimilarityParamsI params)

212

{

213

configureMode(false,true,true);

214

Console.trace("Starting to calculate: "+getDescription());

215

if (ssForSeqs == null

216

|| ssForSeqs.size() != seqData.getSequences().length)

217

{

218

// expandSeqData needs to be called to initialise the hash

219

SequenceI[] sequences = new SequenceI[seqData.getSequences().length];

220

// we throw away the new labels in this case..

221

expandSeqData(sequences, seqData, params, new ArrayList<String>(),

222

new ArrayList<AlignmentAnnotation>(),

223

new HashMap<Integer, String>());

224

}

225

SeqCigar[] seqs = seqData.getSequences();

226

int noseqs = seqs.length; // no of sequences

227

int cpwidth = 0;

228

double[][] differences = new double[noseqs][noseqs]; // matrix to store

229

// similarity score

230

// secondary structure source parameter selected by the user from the drop

231

// down.

232

String ssSource = params.getSecondaryStructureSource();

233

if (ssSource == null || ssSource == "")

234

{

235

ssSource = Constants.SS_ALL_PROVIDERS;

236

}

237

238

// need to get real position for view position

239

int[] viscont = seqData.getVisibleContigs();

240

241

242

* scan each column, compute and add to each similarity[i, j]

243

* the number of secondary structure annotation that seqi

244

* and seqj do not share

double maxDiff = 0;

float maxVal=0f;

for (int i = 0; i < noseqs; i++)

249

{

250

AlignmentAnnotation aa_i = ssForSeqs.get(i);

251

//if (aa_i.graphMax>22000) { continue; }

252

maxVal = Math.max(maxVal, aa_i.graphMax);

253

maxDiff = Math.max(maxDiff,

254

Math.abs(aa_i.graphMax - Math.min(0, aa_i.graphMin)));

255

}

256

// for (int i=0;i < noseqs;i++)

257

// {

258

// AlignmentAnnotation aa_i = ssForSeqs.get(i);

259

// aa_i.graphMin=0f;

260

// aa_i.graphMax=maxVal;

// }

if (logIt)

{

maxDiff = Math.log1p(maxDiff);

}

Annotation ann1;

Annotation ann2;

for (int vc = 0; vc < viscont.length; vc += 2)

270

{

271

// Iterates for each column position

272

for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++)

273

{

274

cpwidth++; // used to normalise the similarity score

275

276

277

* get set of sequences without gap in the current column

278

279

Set<SeqCigar> seqsWithoutGapAtCol = findSeqsWithoutGapAtColumn(seqs,

cpos);

* calculate similarity score for each secondary structure annotation on i'th and j'th

284

* sequence and add this measure to the similarities matrix

285

* for [i, j] for j > i

286

287

288

for (int i = 0; i < (noseqs - 1); i++)

289

{

290

AlignmentAnnotation aa_i = ssForSeqs.get(i);

291

boolean undefinedSS1 = aa_i == null;

292

// check if the sequence contains gap in the current column

293

boolean gap1 = !seqsWithoutGapAtCol.contains(seqs[i]);

294

// secondary structure is fetched only if the current column is not

295

// gap for the sequence

296

double ss1 = 0;

297

if (!gap1 && !undefinedSS1)

298

{

299

// fetch the position in sequence for the column and finds the

300

// corresponding secondary structure annotation

301

// TO DO - consider based on priority and displayed

302

int seqPosition_i = seqs[i].findPosition(cpos);

303

if (aa_i != null)

304

{

305

ann1 = aa_i.getAnnotationForPosition(seqPosition_i);

if (ann1 != null)

{

ss1 = ann1.value;

if (relDiff) {

ss1 = ss1/aa_i.graphMax;

}

if (logIt)

{

ss1 = Math.log1p(ss1);

}

}

}

}

// Iterates for each sequences

320

for (int j = i + 1; j < noseqs; j++)

321

{

322

323

// check if ss is defined

324

AlignmentAnnotation aa_j = ssForSeqs.get(j);

325

326

boolean undefinedSS2 = aa_j == null;

327

328

// Set similarity to max score if both SS are not defined

329

if (undefinedSS1 || undefinedSS2)

330

{

331

differences[i][j] += 0;

continue;

}

// Set similarity to minimum score if either one SS is not defined

336

// else if (undefinedSS1 || undefinedSS2)

337

// {

338

// differences[i][j] += maxDiff;

// continue;

// }

boolean gap2 = !seqsWithoutGapAtCol.contains(seqs[j]);

343

344

// Variable to store secondary structure at the current column

345

double ss2 = 0;

346

347

if (!gap2 && !undefinedSS2)

348

{

349

int seqPosition = seqs[j].findPosition(cpos);

350

ann2 = aa_j.getAnnotationForPosition(seqPosition);

if (ann2 != null)

{

ss2 = ann2.value;

if (relDiff) {

ss2 = ss2/aa_j.graphMax;

}

if (logIt)

{

ss2 = Math.log1p(ss2);

}

}

}

if ((!gap1 && !gap2))

{

if (hamming)

{

differences[i][j]+=(ss1==ss2 ? 0:1);

}

else

{

// Calculate similarity score based on the substitution matrix

373

double similarityScore = Math.abs(ss1 - ss2);

374

if (logIt)

375

{

376

similarityScore = Math.log1p(similarityScore);

377

}

378

differences[i][j] += similarityScore;

}

}

else

{

if (hamming) {differences[i][j]+=1;}

384

else {

385

differences[i][j] += 0;// maxDiff; }

}

}

}

}

}

}

* normalise the similarity scores (summed over columns) by the

395

* number of visible columns used in the calculation

396

* and fill in the bottom half of the matrix

397

398

// TODO JAL-2424 cpwidth may be out by 1 - affects scores but not tree shape

399

400

for (int i = 0; i < noseqs; i++)

401

{

402

for (int j = i + 1; j < noseqs; j++)

403

{

404

differences[i][j] /= cpwidth;

405

differences[j][i] = differences[i][j];

406

}

407

}

408

return (new Matrix(differences));

}

/**

* Builds and returns a set containing sequences (SeqCigar) which do not have

414

* a gap at the given column position.

415

416

* @param seqs

417

* @param columnPosition

* (0..)

* @return

private Set<SeqCigar> findSeqsWithoutGapAtColumn(SeqCigar[] seqs,

422

int columnPosition)

423

{

424

Set<SeqCigar> seqsWithoutGapAtCol = new HashSet<>();

425

for (SeqCigar seq : seqs)

426

{

427

int spos = seq.findPosition(columnPosition);

if (spos != -1)

{

* position is not a gap

432

433

seqsWithoutGapAtCol.add(seq);

434

}

435

}

436

return seqsWithoutGapAtCol;

437

}

438

439

148

@Override

440

public String getName()

441

{

442

148

return "Annotation";

}

@Override

public String getDescription()

447

{

448

return "Score between annotation ("+(hamming?"hamming":(logIt?"log1p ":"")+(relDiff?"Relative":",Absolute"))+")";

}

@Override

public boolean isDNA()

{

return false;

}

@Override

public boolean isProtein()

{

return false;

}

@Override

public boolean isSecondaryStructure()

{

return true;

}

@Override

public String toString()

471

{

472

return "Score between sequences based on similarity between binary "

473

+ "vectors marking secondary structure displayed at each column";

474

}

475

}

Coverage Report

File ValueAnnotationDistanceModel.java

Coverage histogram

Code metrics

Classes

Class ValueAnnotationDistanceModel

Contributing tests

Contributing tests

Source view