File SecondaryStructureDistanceModel.java

Branches:

Statements:

100

Methods:

Classes:

LOC:

385

NCLOC:

239

Total complexity:

Complexity density:

0.44

Statements/Method:

8.33

Methods/Class:

Average method complexity:

3.67

Classes

Class	Line #	Total Statements	Complexity	TOTAL Coverage	Actions
SecondaryStructureDistanceModel	47	100	44	0.6558441565.6%

Class SecondaryStructureDistanceModel

Class SecondaryStructureDistanceModel	Line # 47	Total Statements 100	Complexity 44	TOTAL Coverage 0.6558441565.6%
SecondaryStructureDistanceModel() SecondaryStructureDistanceModel()	6060	0.00	1.01	-1.0 -1.0 -
getInstance(AlignmentViewPanel) : ScoreModelI getInstance(AlignmentViewPanel) : ScoreModelI	6565	8.08	3.03	0.625 0.62562.5%
configureFromAlignmentView(AlignmentViewPanel) : boolean configureFromAlignmentView(AlignmentViewPanel) : boolean	8585	2.02	1.01	1.0 1.0100%
expandSeqData(SequenceI[],AlignmentView,SimilarityParamsI,List<String>,ArrayList<AlignmentAnnotation>) : SequenceI[] expandSeqData(SequenceI[],AlignmentView,SimilarityParamsI,List<String>,ArrayList<AlignmentAnnotation>) : SequenceI[]	9494	30.030	7.07	0.575 0.57557.5%
findDistances(AlignmentView,SimilarityParamsI) : MatrixI findDistances(AlignmentView,SimilarityParamsI) : MatrixI	183183	48.048	24.024	0.6282051 0.628205162.8%
findSeqsWithoutGapAtColumn(SeqCigar[],int) : Set<SeqCigar> findSeqsWithoutGapAtColumn(SeqCigar[],int) : Set<SeqCigar>	331331	6.06	2.02	1.0 1.0100%
getName() : String getName() : String	349349	1.01	1.01	1.0 1.0100%
getDescription() : String getDescription() : String	355355	1.01	1.01	0.0 0.00%
isDNA() : boolean isDNA() : boolean	361361	1.01	1.01	1.0 1.0100%
isProtein() : boolean isProtein() : boolean	367367	1.01	1.01	1.0 1.0100%
isSecondaryStructure() : boolean isSecondaryStructure() : boolean	373373	1.01	1.01	1.0 1.0100%
toString() : String toString() : String	379379	1.01	1.01	0.0 0.00%

Contributing tests

This file is covered by 5 tests. .

Contributing tests

Test contribution	Test	Result
0.6168831	jalview.analysis.scoremodels.SecondaryStructureDistanceModelTest.testFindDistances_withSSUndefinedInBothSeqsjalview.analysis.scoremodels.SecondaryStructureDistanceModelTest.testFindDistances_withSSUndefinedInBothSeqs	1PASS
0.6168831	jalview.analysis.scoremodels.SecondaryStructureDistanceModelTest.testFindDistances_withSSUndefinedInEitherOneSeqjalview.analysis.scoremodels.SecondaryStructureDistanceModelTest.testFindDistances_withSSUndefinedInEitherOneSeq	1PASS
0.6168831	jalview.analysis.scoremodels.SecondaryStructureDistanceModelTest.testFindDistances_withGapjalview.analysis.scoremodels.SecondaryStructureDistanceModelTest.testFindDistances_withGap	1PASS
0.051948052	jalview.gui.CalculationChooserTest.testGetApplicableScoreModelsjalview.gui.CalculationChooserTest.testGetApplicableScoreModels	1PASS
0.012987013	jalview.analysis.scoremodels.ScoreModelsTest.testConstructorjalview.analysis.scoremodels.ScoreModelsTest.testConstructor	1PASS

Source view

* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)

* Copyright (C) $$Year-Rel$$ The Jalview Authors

* This file is part of Jalview.

* Jalview is free software: you can redistribute it and/or

* modify it under the terms of the GNU General Public License

* as published by the Free Software Foundation, either version 3

* of the License, or (at your option) any later version.

* Jalview is distributed in the hope that it will be useful, but

* WITHOUT ANY WARRANTY; without even the implied warranty

* of MERCHANTABILITY or FITNESS FOR A PARTICULAR

* PURPOSE. See the GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with Jalview. If not, see <http://www.gnu.org/licenses/>.

* The Jalview Authors are detailed in the 'AUTHORS' file.

package jalview.analysis.scoremodels;

import jalview.analysis.AlignmentUtils;

import jalview.api.AlignmentViewPanel;

import jalview.api.FeatureRenderer;

import jalview.api.analysis.ScoreModelI;

import jalview.api.analysis.SimilarityParamsI;

import jalview.datamodel.AlignmentAnnotation;

import jalview.datamodel.AlignmentView;

import jalview.datamodel.SeqCigar;

import jalview.datamodel.SequenceI;

import jalview.math.Matrix;

import jalview.math.MatrixI;

import jalview.util.Constants;

import jalview.util.MessageManager;

import java.util.ArrayList;

import java.util.HashMap;

import java.util.HashSet;

import java.util.List;

import java.util.Map;

import java.util.Set;

/* This class contains methods to calculate distance score between

* secondary structure annotations of the sequences.

public class SecondaryStructureDistanceModel extends DistanceScoreModel

{

private static final String NAME = "Secondary Structure Similarity";

private ScoreMatrix ssRateMatrix;

private String description;

FeatureRenderer fr;

/**

* Constructor

public SecondaryStructureDistanceModel()

{

}

@Override

public ScoreModelI getInstance(AlignmentViewPanel view)

{

SecondaryStructureDistanceModel instance;

try

{

instance = this.getClass().getDeclaredConstructor().newInstance();

instance.configureFromAlignmentView(view);

return instance;

} catch (InstantiationException | IllegalAccessException e)

{

jalview.bin.Console.errPrintln("Error in " + getClass().getName()

+ ".getInstance(): " + e.getMessage());

return null;

} catch (ReflectiveOperationException roe)

{

return null;

}

boolean configureFromAlignmentView(AlignmentViewPanel view)

{

fr = view.cloneFeatureRenderer();

return true;

}

ArrayList<AlignmentAnnotation> ssForSeqs = null;

@Override

public SequenceI[] expandSeqData(SequenceI[] sequences,

AlignmentView seqData, SimilarityParamsI scoreParams,

List<String> labels, ArrayList<AlignmentAnnotation> ssAnnotationForSeqs)

{

ssForSeqs = new ArrayList<AlignmentAnnotation>();

100

List<SequenceI> newSequences = new ArrayList<SequenceI>();

101

List<SeqCigar> newCigs = new ArrayList<SeqCigar>();

102

int sq = 0;

103

104

AlignmentAnnotation[] alignAnnotList = fr.getViewport().getAlignment()

105

.getAlignmentAnnotation();

106

107

String ssSource = scoreParams.getSecondaryStructureSource();

108

if (ssSource == null || ssSource == "")

109

{

110

ssSource = Constants.SS_ALL_PROVIDERS;

}

* Add secondary structure annotations that are added to the annotation track

115

* to the map

116

117

Map<SequenceI, ArrayList<AlignmentAnnotation>> ssAlignmentAnnotationForSequences = AlignmentUtils

118

.getSequenceAssociatedAlignmentAnnotations(alignAnnotList,

119

ssSource);

120

121

for (SeqCigar scig : seqData.getSequences())

122

{

123

// get the next sequence that should be bound to this scig: may be null

124

SequenceI alSeq = sequences[sq++];

125

List<AlignmentAnnotation> ssec = ssAlignmentAnnotationForSequences

126

.get(scig.getRefSeq());

if (ssec == null)

{

// not defined

newSequences.add(alSeq);

131

if (alSeq != null)

132

{

133

//labels.add("No Secondary Structure");

134

labels.add(Constants.STRUCTURE_PROVIDERS.get("None"));

135

}

136

SeqCigar newSeqCigar = scig; // new SeqCigar(scig);

137

newCigs.add(newSeqCigar);

ssForSeqs.add(null);

}

else

{

for (int i = 0; i < ssec.size(); i++)

{

if (alSeq != null)

{

String provider = AlignmentUtils.extractSSSourceFromAnnotationDescription(ssec.get(i));

147

//String providerAbbreviation = AlignmentUtils.getProviderKey(provider);

148

labels.add(provider);

149

}

150

// newSequences.add(seq);

151

newSequences.add(alSeq);

152

SeqCigar newSeqCigar = scig; // new SeqCigar(scig);

153

newCigs.add(newSeqCigar);

154

ssForSeqs.add(ssec.get(i));

}

}

}

ssAnnotationForSeqs.addAll(ssForSeqs);

159

seqData.setSequences(newCigs.toArray(new SeqCigar[0]));

160

return newSequences.toArray(new SequenceI[0]);

}

/**

* Calculates distance score [i][j] between each pair of protein sequences

166

* based on their secondary structure annotations (H, E, C). The final score

167

* is normalised by the number of alignment columns processed, providing an

168

* average similarity score.

169

* <p>

170

* The parameters argument can include settings for handling gap-residue

171

* aligned positions and may determine if the score calculation is based on

172

* the longer or shorter sequence in each pair. This can be important for

173

* handling partial alignments or sequences of significantly different

* lengths.

* @param seqData

* The aligned sequence data including secondary structure

178

* annotations.

179

* @param params

180

* Additional parameters for customising the scoring process, such as

181

* gap handling and sequence length consideration.

182

183

@Override

184

public MatrixI findDistances(AlignmentView seqData,

185

SimilarityParamsI params)

186

{

187

if (ssForSeqs == null

188

|| ssForSeqs.size() != seqData.getSequences().length)

189

{

190

// expandSeqData needs to be called to initialise the hash

191

SequenceI[] sequences = new SequenceI[seqData.getSequences().length];

192

// we throw away the new labels in this case..

193

expandSeqData(sequences, seqData, params, new ArrayList<String>(), new ArrayList<AlignmentAnnotation>());

194

}

195

SeqCigar[] seqs = seqData.getSequences();

196

int noseqs = seqs.length; // no of sequences

197

int cpwidth = 0;

198

double[][] similarities = new double[noseqs][noseqs]; // matrix to store

199

// similarity score

200

// secondary structure source parameter selected by the user from the drop

201

// down.

202

String ssSource = params.getSecondaryStructureSource();

203

if (ssSource == null || ssSource == "")

204

{

205

ssSource = Constants.SS_ALL_PROVIDERS;

206

}

207

ssRateMatrix = ScoreModels.getInstance().getSecondaryStructureMatrix();

208

209

// need to get real position for view position

210

int[] viscont = seqData.getVisibleContigs();

211

212

213

* scan each column, compute and add to each similarity[i, j]

214

* the number of secondary structure annotation that seqi

215

* and seqj do not share

216

217

for (int vc = 0; vc < viscont.length; vc += 2)

218

{

219

// Iterates for each column position

220

for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++)

221

{

222

cpwidth++; // used to normalise the similarity score

223

224

225

* get set of sequences without gap in the current column

226

227

Set<SeqCigar> seqsWithoutGapAtCol = findSeqsWithoutGapAtColumn(seqs,

cpos);

* calculate similarity score for each secondary structure annotation on i'th and j'th

232

* sequence and add this measure to the similarities matrix

233

* for [i, j] for j > i

234

235

for (int i = 0; i < (noseqs - 1); i++)

236

{

237

AlignmentAnnotation aa_i = ssForSeqs.get(i);

238

boolean undefinedSS1 = aa_i == null;

239

// check if the sequence contains gap in the current column

240

boolean gap1 = !seqsWithoutGapAtCol.contains(seqs[i]);

241

// secondary structure is fetched only if the current column is not

242

// gap for the sequence

243

char ss1 = '*';

244

if (!gap1 && !undefinedSS1)

245

{

246

// fetch the position in sequence for the column and finds the

247

// corresponding secondary structure annotation

248

// TO DO - consider based on priority and displayed

249

int seqPosition_i = seqs[i].findPosition(cpos);

250

if (aa_i != null)

251

ss1 = AlignmentUtils.findSSAnnotationForGivenSeqposition(aa_i,

252

seqPosition_i);

253

}

254

// Iterates for each sequences

255

for (int j = i + 1; j < noseqs; j++)

256

{

257

258

// check if ss is defined

259

AlignmentAnnotation aa_j = ssForSeqs.get(j);

260

boolean undefinedSS2 = aa_j == null;

261

262

// Set similarity to max score if both SS are not defined

263

if (undefinedSS1 && undefinedSS2)

264

{

265

similarities[i][j] += ssRateMatrix.getMaximumScore();

continue;

}

// Set similarity to minimum score if either one SS is not defined

270

else if (undefinedSS1 || undefinedSS2)

271

{

272

similarities[i][j] += ssRateMatrix.getMinimumScore();

continue;

}

boolean gap2 = !seqsWithoutGapAtCol.contains(seqs[j]);

277

278

// Variable to store secondary structure at the current column

279

char ss2 = '*';

280

281

if (!gap2 && !undefinedSS2)

282

{

283

int seqPosition = seqs[j].findPosition(cpos);

284

285

if (aa_j != null)

286

ss2 = AlignmentUtils.findSSAnnotationForGivenSeqposition(

aa_j, seqPosition);

}

if ((!gap1 && !gap2) || params.includeGaps())

291

{

292

// Calculate similarity score based on the substitution matrix

293

double similarityScore = ssRateMatrix.getPairwiseScore(ss1,

294

ss2);

295

similarities[i][j] += similarityScore;

}

}

}

}

}

* normalise the similarity scores (summed over columns) by the

304

* number of visible columns used in the calculation

305

* and fill in the bottom half of the matrix

306

307

// TODO JAL-2424 cpwidth may be out by 1 - affects scores but not tree shape

308

309

for (int i = 0; i < noseqs; i++)

310

{

311

for (int j = i + 1; j < noseqs; j++)

312

{

313

similarities[i][j] /= cpwidth;

314

similarities[j][i] = similarities[i][j];

315

}

316

}

317

return SimilarityScoreModel

318

.similarityToDistance(new Matrix(similarities));

}

/**

* Builds and returns a set containing sequences (SeqCigar) which do not have

324

* a gap at the given column position.

325

326

* @param seqs

327

* @param columnPosition

* (0..)

* @return

private Set<SeqCigar> findSeqsWithoutGapAtColumn(SeqCigar[] seqs,

332

int columnPosition)

333

{

334

Set<SeqCigar> seqsWithoutGapAtCol = new HashSet<>();

335

for (SeqCigar seq : seqs)

336

{

337

int spos = seq.findPosition(columnPosition);

if (spos != -1)

{

* position is not a gap

342

343

seqsWithoutGapAtCol.add(seq);

344

}

345

}

346

return seqsWithoutGapAtCol;

347

}

348

349

167

@Override

350

public String getName()

351

{

352

167

return NAME;

}

@Override

public String getDescription()

{

return description;

}

@Override

public boolean isDNA()

{

return false;

}

@Override

public boolean isProtein()

{

return false;

}

@Override

public boolean isSecondaryStructure()

{

return true;

}

@Override

public String toString()

381

{

382

return "Score between sequences based on similarity between binary "

383

+ "vectors marking secondary structure displayed at each column";

384

}

385

}

Coverage Report

File SecondaryStructureDistanceModel.java

Coverage histogram

Code metrics

Classes

Class SecondaryStructureDistanceModel

Contributing tests

Contributing tests

Source view