File SecondaryStructureDistanceModel.java

Branches:

Statements:

Methods:

Classes:

LOC:

416

NCLOC:

240

Total complexity:

Complexity density:

0.46

Statements/Method:

7.15

Methods/Class:

Average method complexity:

3.31

Classes

Class	Line #	Total Statements	Complexity	TOTAL Coverage	Actions
SecondaryStructureDistanceModel	45	93	43	0.6464%

Class SecondaryStructureDistanceModel

Class SecondaryStructureDistanceModel	Line # 45	Total Statements 93	Complexity 43	TOTAL Coverage 0.6464%
SecondaryStructureDistanceModel() SecondaryStructureDistanceModel()	5858	0.00	1.01	-1.0 -1.0 -
getInstance(AlignmentViewPanel) : ScoreModelI getInstance(AlignmentViewPanel) : ScoreModelI	6363	8.08	3.03	0.625 0.62562.5%
configureFromAlignmentView(AlignmentViewPanel) : boolean configureFromAlignmentView(AlignmentViewPanel) : boolean	8383	2.02	1.01	1.0 1.0100%
findDistances(AlignmentView,SimilarityParamsI) : MatrixI findDistances(AlignmentView,SimilarityParamsI) : MatrixI	109109	57.057	24.024	0.5494506 0.549450654.9%
findSeqsWithoutGapAtColumn(SeqCigar[],int) : Set<SeqCigar> findSeqsWithoutGapAtColumn(SeqCigar[],int) : Set<SeqCigar>	297297	6.06	2.02	1.0 1.0100%
findSeqsWithUndefinedSS(SeqCigar[],Map<String, HashSet<String>>) : Set<SeqCigar> findSeqsWithUndefinedSS(SeqCigar[],Map<String, HashSet<String>>) : Set<SeqCigar>	326326	5.05	2.02	0.85714287 0.8571428785.7%
isSSUndefinedOrNotAdded(SeqCigar,Map<String, HashSet<String>>) : boolean isSSUndefinedOrNotAdded(SeqCigar,Map<String, HashSet<String>>) : boolean	351351	9.09	4.04	0.6666667 0.666666766.7%
getName() : String getName() : String	380380	1.01	1.01	1.0 1.0100%
getDescription() : String getDescription() : String	386386	1.01	1.01	0.0 0.00%
isDNA() : boolean isDNA() : boolean	392392	1.01	1.01	1.0 1.0100%
isProtein() : boolean isProtein() : boolean	398398	1.01	1.01	1.0 1.0100%
isSecondaryStructure() : boolean isSecondaryStructure() : boolean	404404	1.01	1.01	1.0 1.0100%
toString() : String toString() : String	410410	1.01	1.01	0.0 0.00%

Contributing tests

This file is covered by 5 tests. .

Contributing tests

Test contribution	Test	Result
0.6	jalview.analysis.scoremodels.SecondaryStructureDistanceModelTest.testFindDistances_withSSUndefinedInEitherOneSeqjalview.analysis.scoremodels.SecondaryStructureDistanceModelTest.testFindDistances_withSSUndefinedInEitherOneSeq	1PASS
0.6	jalview.analysis.scoremodels.SecondaryStructureDistanceModelTest.testFindDistances_withGapjalview.analysis.scoremodels.SecondaryStructureDistanceModelTest.testFindDistances_withGap	1PASS
0.56666666	jalview.analysis.scoremodels.SecondaryStructureDistanceModelTest.testFindDistances_withSSUndefinedInBothSeqsjalview.analysis.scoremodels.SecondaryStructureDistanceModelTest.testFindDistances_withSSUndefinedInBothSeqs	1PASS
0.053333335	jalview.gui.CalculationChooserTest.testGetApplicableScoreModelsjalview.gui.CalculationChooserTest.testGetApplicableScoreModels	1PASS
0.013333334	jalview.analysis.scoremodels.ScoreModelsTest.testConstructorjalview.analysis.scoremodels.ScoreModelsTest.testConstructor	1PASS

Source view

* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)

* Copyright (C) $$Year-Rel$$ The Jalview Authors

* This file is part of Jalview.

* Jalview is free software: you can redistribute it and/or

* modify it under the terms of the GNU General Public License

* as published by the Free Software Foundation, either version 3

* of the License, or (at your option) any later version.

* Jalview is distributed in the hope that it will be useful, but

* WITHOUT ANY WARRANTY; without even the implied warranty

* of MERCHANTABILITY or FITNESS FOR A PARTICULAR

* PURPOSE. See the GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with Jalview. If not, see <http://www.gnu.org/licenses/>.

* The Jalview Authors are detailed in the 'AUTHORS' file.

package jalview.analysis.scoremodels;

import jalview.analysis.AlignmentUtils;

import jalview.api.AlignmentViewPanel;

import jalview.api.FeatureRenderer;

import jalview.api.analysis.ScoreModelI;

import jalview.api.analysis.SimilarityParamsI;

import jalview.datamodel.AlignmentAnnotation;

import jalview.datamodel.AlignmentView;

import jalview.datamodel.Annotation;

import jalview.datamodel.SeqCigar;

import jalview.math.Matrix;

import jalview.math.MatrixI;

import jalview.util.Constants;

import jalview.util.SetUtils;

import java.util.HashMap;

import java.util.HashSet;

import java.util.Map;

import java.util.Set;

/* This class contains methods to calculate distance score between

* secondary structure annotations of the sequences.

public class SecondaryStructureDistanceModel extends DistanceScoreModel

{

private static final String NAME = "Secondary Structure Similarity";

private ScoreMatrix ssRateMatrix;

private String description;

FeatureRenderer fr;

/**

* Constructor

public SecondaryStructureDistanceModel()

{

}

@Override

public ScoreModelI getInstance(AlignmentViewPanel view)

{

SecondaryStructureDistanceModel instance;

try

{

instance = this.getClass().getDeclaredConstructor().newInstance();

instance.configureFromAlignmentView(view);

return instance;

} catch (InstantiationException | IllegalAccessException e)

{

jalview.bin.Console.errPrintln("Error in " + getClass().getName()

+ ".getInstance(): " + e.getMessage());

return null;

} catch (ReflectiveOperationException roe)

{

return null;

}

boolean configureFromAlignmentView(AlignmentViewPanel view)

{

fr = view.cloneFeatureRenderer();

return true;

}

/**

* Calculates distance score [i][j] between each pair of protein sequences

* based on their secondary structure annotations (H, E, C). The final score

* is normalised by the number of alignment columns processed, providing an

* average similarity score.

* <p>

* The parameters argument can include settings for handling gap-residue

* aligned positions and may determine if the score calculation is based on

* the longer or shorter sequence in each pair. This can be important for

* handling partial alignments or sequences of significantly different

* lengths.

* @param seqData

* The aligned sequence data including secondary structure

104

* annotations.

105

* @param params

106

* Additional parameters for customising the scoring process, such as

107

* gap handling and sequence length consideration.

108

109

@Override

110

public MatrixI findDistances(AlignmentView seqData,

111

SimilarityParamsI params)

112

{

113

114

SeqCigar[] seqs = seqData.getSequences();

115

int noseqs = seqs.length; // no of sequences

116

int cpwidth = 0; // = seqData.getWidth();

117

double[][] similarities = new double[noseqs][noseqs]; // matrix to store

118

// similarity score

119

// secondary structure source parameter selected by the user from the drop

120

// down.

121

String ssSource = params.getSecondaryStructureSource();

122

ssRateMatrix = ScoreModels.getInstance().getSecondaryStructureMatrix();

123

124

// defining the default value for secondary structure source as 3d

125

// structures

126

// or JPred if user selected JPred

127

String selectedSSSource = Constants.SS_ANNOTATION_LABEL;

128

if (ssSource.equals(Constants.SECONDARY_STRUCTURE_LABELS

129

.get(Constants.SS_ANNOTATION_FROM_JPRED_LABEL)))

130

{

131

selectedSSSource = Constants.SS_ANNOTATION_FROM_JPRED_LABEL;

132

}

133

134

// need to get real position for view position

135

int[] viscont = seqData.getVisibleContigs();

136

137

138

* Add secondary structure annotations that are added to the annotation track

139

* to the map

140

141

Map<String, HashSet<String>> ssAlignmentAnnotationForSequences = new HashMap<String, HashSet<String>>();

142

143

AlignmentAnnotation[] alignAnnotList = fr.getViewport().getAlignment()

144

.getAlignmentAnnotation();

145

146

if (alignAnnotList.length > 0)

147

{

148

for (AlignmentAnnotation aa : alignAnnotList)

149

{

150

if (aa.sequenceRef==null)

{

continue;

}

if (selectedSSSource.equals(aa.label))

155

{

156

ssAlignmentAnnotationForSequences

157

.computeIfAbsent(aa.sequenceRef.getName(),

158

k -> new HashSet<>())

159

.add(aa.description);

}

}

}

* Get the set of sequences which are not considered for the calculation.

166

* Following sequences are added:

167

* 1. Sequences without a defined secondary structure from the selected

168

* source.

169

* 2. Sequences whose secondary structure annotations are not added to

170

* the annotation track

171

172

Set<SeqCigar> seqsWithUndefinedSS = findSeqsWithUndefinedSS(seqs,

173

ssAlignmentAnnotationForSequences);

174

175

176

* scan each column, compute and add to each similarity[i, j]

177

* the number of secondary structure annotation that seqi

178

* and seqj do not share

179

180

for (int vc = 0; vc < viscont.length; vc += 2)

181

{

182

// Iterates for each column position

183

for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++)

184

{

185

cpwidth++; // used to normalise the similarity score

186

187

188

* get set of sequences without gap in the current column

189

190

Set<SeqCigar> seqsWithoutGapAtCol = findSeqsWithoutGapAtColumn(seqs,

cpos);

* calculate similarity score for each secondary structure annotation on i'th and j'th

195

* sequence and add this measure to the similarities matrix

196

* for [i, j] for j > i

197

198

for (int i = 0; i < (noseqs - 1); i++)

199

{

200

// Iterates for each sequences

201

for (int j = i + 1; j < noseqs; j++)

202

{

203

SeqCigar sc1 = seqs[i];

204

SeqCigar sc2 = seqs[j];

205

206

// check if ss is defined

207

boolean undefinedSS1 = seqsWithUndefinedSS.contains(sc1);

208

boolean undefinedSS2 = seqsWithUndefinedSS.contains(sc2);

209

210

// Set similarity to max score if both SS are not defined

211

if (undefinedSS1 && undefinedSS2)

212

{

213

similarities[i][j] += ssRateMatrix.getMaximumScore();

continue;

}

// Set similarity to minimum score if either one SS is not defined

218

else if (undefinedSS1 || undefinedSS2)

219

{

220

similarities[i][j] += ssRateMatrix.getMinimumScore();

continue;

}

// check if the sequence contains gap in the current column

225

boolean gap1 = !seqsWithoutGapAtCol.contains(sc1);

226

boolean gap2 = !seqsWithoutGapAtCol.contains(sc2);

227

228

// Variable to store secondary structure at the current column

char ss1 = '*';

char ss2 = '*';

// secondary structure is fetched only if the current column is not

233

// gap for the sequence

234

if (!gap1 && !undefinedSS1)

235

{

236

// fetch the position in sequence for the column and finds the

237

// corresponding secondary structure annotation

238

// TO DO - consider based on priority and displayed

239

int seqPosition = seqs[i].findPosition(cpos);

240

AlignmentAnnotation[] aa = seqs[i].getRefSeq()

241

.getAnnotation(selectedSSSource);

242

if (aa != null)

243

ss1 = AlignmentUtils.findSSAnnotationForGivenSeqposition(

aa[0], seqPosition);

}

if (!gap2 && !undefinedSS2)

248

{

249

int seqPosition = seqs[j].findPosition(cpos);

250

AlignmentAnnotation[] aa = seqs[j].getRefSeq()

251

.getAnnotation(selectedSSSource);

252

if (aa != null)

253

ss2 = AlignmentUtils.findSSAnnotationForGivenSeqposition(

aa[0], seqPosition);

}

if ((!gap1 && !gap2) || params.includeGaps())

258

{

259

// Calculate similarity score based on the substitution matrix

260

double similarityScore = ssRateMatrix.getPairwiseScore(ss1,

261

ss2);

262

similarities[i][j] += similarityScore;

}

}

}

}

}

* normalise the similarity scores (summed over columns) by the

271

* number of visible columns used in the calculation

272

* and fill in the bottom half of the matrix

273

274

// TODO JAL-2424 cpwidth may be out by 1 - affects scores but not tree shape

275

276

for (int i = 0; i < noseqs; i++)

277

{

278

for (int j = i + 1; j < noseqs; j++)

279

{

280

similarities[i][j] /= cpwidth;

281

similarities[j][i] = similarities[i][j];

282

}

283

}

284

return ssRateMatrix.similarityToDistance(new Matrix(similarities));

}

/**

* Builds and returns a set containing sequences (SeqCigar) which do not have

290

* a gap at the given column position.

291

292

* @param seqs

293

* @param columnPosition

* (0..)

* @return

private Set<SeqCigar> findSeqsWithoutGapAtColumn(SeqCigar[] seqs,

298

int columnPosition)

299

{

300

Set<SeqCigar> seqsWithoutGapAtCol = new HashSet<>();

301

for (SeqCigar seq : seqs)

302

{

303

int spos = seq.findPosition(columnPosition);

if (spos != -1)

{

* position is not a gap

308

309

seqsWithoutGapAtCol.add(seq);

310

}

311

}

312

return seqsWithoutGapAtCol;

}

/**

* Builds and returns a set containing sequences (SeqCigar) which are not

317

* considered for the similarity calculation. Following sequences are added:

318

* 1. Sequences without a defined secondary structure from the selected

319

* source. 2. Sequences whose secondary structure annotations are not added to

320

* the annotation track

321

322

* @param seqs

323

* @param ssAlignmentAnnotationForSequences

324

* @return

325

326

private Set<SeqCigar> findSeqsWithUndefinedSS(SeqCigar[] seqs,

327

Map<String, HashSet<String>> ssAlignmentAnnotationForSequences)

328

{

329

Set<SeqCigar> seqsWithUndefinedSS = new HashSet<>();

330

for (SeqCigar seq : seqs)

331

{

332

if (isSSUndefinedOrNotAdded(seq, ssAlignmentAnnotationForSequences))

333

{

334

seqsWithUndefinedSS.add(seq);

335

}

336

}

337

return seqsWithUndefinedSS;

}

/**

* Returns true if a sequence (SeqCigar) should not be considered for the

342

* similarity calculation. Following conditions are checked: 1. Sequence

343

* without a defined secondary structure from the selected source. 2.

344

* Sequences whose secondary structure annotations are not added to the

* annotation track

* @param seq

* @param ssAlignmentAnnotationForSequences

349

* @return

350

351

private boolean isSSUndefinedOrNotAdded(SeqCigar seq,

352

Map<String, HashSet<String>> ssAlignmentAnnotationForSequences)

353

{

354

for (String label : Constants.SECONDARY_STRUCTURE_LABELS.keySet())

355

{

356

AlignmentAnnotation[] annotations = seq.getRefSeq()

357

.getAnnotation(label);

358

if (annotations != null)

359

{

360

for (AlignmentAnnotation annotation : annotations)

361

{

362

HashSet<String> descriptionSet = ssAlignmentAnnotationForSequences

363

.get(annotation.sequenceRef.getName());

364

if (descriptionSet != null)

365

{

366

if (descriptionSet.contains(annotation.description))

367

{

368

// Secondary structure annotation is present and

369

// added to the track, no need to add seq

return false;

}

}

}

}

}

// Either annotations are undefined or not added to the track

return true;

}

175

@Override

381

public String getName()

382

{

383

175

return NAME;

}

@Override

public String getDescription()

{

return description;

}

@Override

public boolean isDNA()

{

return false;

}

@Override

public boolean isProtein()

{

return false;

}

@Override

public boolean isSecondaryStructure()

{

return true;

}

@Override

public String toString()

412

{

413

return "Score between sequences based on similarity between binary "

414

+ "vectors marking secondary structure displayed at each column";

415

}

416

}

Coverage Report

File SecondaryStructureDistanceModel.java

Coverage histogram

Code metrics

Classes

Class SecondaryStructureDistanceModel

Contributing tests

Contributing tests

Source view