File ExonerateHelper.java

Branches:

Statements:

Methods:

Classes:

LOC:

375

NCLOC:

202

Total complexity:

Complexity density:

0.42

Statements/Method:

14.67

Methods/Class:

Average method complexity:

6.17

Classes

Class	Line #	Total Statements	Complexity	TOTAL Coverage	Actions
ExonerateHelper	39	88	37	0.849206384.9%

Class ExonerateHelper

Class ExonerateHelper	Line # 39	Total Statements 88	Complexity 37	TOTAL Coverage 0.849206384.9%
processGff(SequenceI,String[],AlignmentI,List<SequenceI>,boolean) : SequenceFeature processGff(SequenceI,String[],AlignmentI,List<SequenceI>,boolean) : SequenceFeature	7979	6.06	2.02	0.8333333 0.833333383.3%
processGffSimilarity(Map<String, List<String>>,SequenceI,String[],AlignmentI,List<SequenceI>,boolean) : void processGffSimilarity(Map<String, List<String>>,SequenceI,String[],AlignmentI,List<SequenceI>,boolean) : void	123123	31.031	12.012	0.8 0.880%
buildMapping(String,MappingType,boolean,boolean,String[]) : MapList buildMapping(String,MappingType,boolean,boolean,String[]) : MapList	234234	34.034	6.06	0.88095236 0.8809523688.1%
getMappingType(String) : MappingType getMappingType(String) : MappingType	311311	6.06	7.07	1.0 1.0100%
recognises(String[]) : boolean recognises(String[]) : boolean	334334	9.09	9.09	0.93333334 0.9333333493.3%
buildSequenceFeature(String[],Map<String, List<String>>) : SequenceFeature buildSequenceFeature(String[],Map<String, List<String>>) : SequenceFeature	365365	2.02	1.01	0.0 0.00%

Contributing tests

This file is covered by 14 tests. .

Contributing tests

Test contribution	Test	Result
0.64285713	jalview.io.gff.GffTests.testResolveExonerateGffjalview.io.gff.GffTests.testResolveExonerateGff	1PASS
0.61904764	jalview.io.FeaturesFileTest.simpleGff3RelaxedIdMatchingjalview.io.FeaturesFileTest.simpleGff3RelaxedIdMatching	1PASS
0.61904764	jalview.io.FeaturesFileTest.readGff3Filejalview.io.FeaturesFileTest.readGff3File	1PASS
0.61904764	jalview.io.FeaturesFileTest.simpleGff3FileClassjalview.io.FeaturesFileTest.simpleGff3FileClass	1PASS
0.61904764	jalview.io.gff.ExonerateHelperTest.testAddExonerateGffToAlignmentjalview.io.gff.ExonerateHelperTest.testAddExonerateGffToAlignment	1PASS
0.61904764	jalview.io.FeaturesFileTest.simpleGff3FileLoaderjalview.io.FeaturesFileTest.simpleGff3FileLoader	1PASS
0.52380955	jalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_forward_querygffjalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_forward_querygff	1PASS
0.515873	jalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_reverse_querygffjalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_reverse_querygff	1PASS
0.48412699	jalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_forward_targetgffjalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_forward_targetgff	1PASS
0.47619048	jalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_reverse_targetgffjalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_reverse_targetgff	1PASS
0.11904762	jalview.io.gff.GffHelperFactoryTest.testGetHelperjalview.io.gff.GffHelperFactoryTest.testGetHelper	1PASS
0.08730159	jalview.io.gff.ExonerateHelperTest.testGetMappingTypejalview.io.gff.ExonerateHelperTest.testGetMappingType	1PASS
0.031746034	jalview.io.FeaturesFileTest.testParse_pureGff3jalview.io.FeaturesFileTest.testParse_pureGff3	1PASS
0.031746034	jalview.io.FeaturesFileTest.testParse_mixedJalviewGffjalview.io.FeaturesFileTest.testParse_mixedJalviewGff	1PASS

Source view

* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)

* Copyright (C) $$Year-Rel$$ The Jalview Authors

* This file is part of Jalview.

* Jalview is free software: you can redistribute it and/or

* modify it under the terms of the GNU General Public License

* as published by the Free Software Foundation, either version 3

* of the License, or (at your option) any later version.

* Jalview is distributed in the hope that it will be useful, but

* WITHOUT ANY WARRANTY; without even the implied warranty

* of MERCHANTABILITY or FITNESS FOR A PARTICULAR

* PURPOSE. See the GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with Jalview. If not, see <http://www.gnu.org/licenses/>.

* The Jalview Authors are detailed in the 'AUTHORS' file.

package jalview.io.gff;

import java.util.Locale;

import jalview.datamodel.AlignedCodonFrame;

import jalview.datamodel.AlignmentI;

import jalview.datamodel.MappingType;

import jalview.datamodel.SequenceFeature;

import jalview.datamodel.SequenceI;

import jalview.util.MapList;

import java.io.IOException;

import java.util.List;

import java.util.Map;

/**

* A handler to parse GFF in the format generated by the exonerate tool

public class ExonerateHelper extends Gff2Helper

{

private static final String SIMILARITY = "similarity";

private static final String GENOME2GENOME = "genome2genome";

private static final String CDNA2GENOME = "cdna2genome";

private static final String CODING2GENOME = "coding2genome";

private static final String CODING2CODING = "coding2coding";

private static final String PROTEIN2GENOME = "protein2genome";

private static final String PROTEIN2DNA = "protein2dna";

private static final String ALIGN = "Align";

private static final String QUERY = "Query";

private static final String TARGET = "Target";

/**

* Process one GFF feature line (as modelled by SequenceFeature)

* @param seq

* the sequence with which this feature is associated

* @param gffColumns

* the sequence feature with ATTRIBUTES property containing any

* additional attributes

* @param align

* the alignment we are adding GFF to

* @param newseqs

* any new sequences referenced by the GFF

* @param relaxedIdMatching

* if true, match word tokens in sequence names

* @return true if the sequence feature should be added to the sequence, else

* false (i.e. it has been processed in another way e.g. to generate a

* mapping)

@Override

public SequenceFeature processGff(SequenceI seq, String[] gffColumns,

AlignmentI align, List<SequenceI> newseqs,

boolean relaxedIdMatching)

{

String attr = gffColumns[ATTRIBUTES_COL];

Map<String, List<String>> set = parseNameValuePairs(attr);

try

{

processGffSimilarity(set, seq, gffColumns, align, newseqs,

relaxedIdMatching);

} catch (IOException ivfe)

{

jalview.bin.Console.errPrintln(ivfe);

}

* return null to indicate we don't want to add a sequence feature for

* similarity (only process it to create mappings)

return null;

}

/**

* Processes the 'Query' (or 'Target') and 'Align' properties associated with

105

* an exonerate GFF similarity feature; these properties define the mapping of

106

* the annotated range to a related sequence.

107

108

* @param set

109

* parsed GFF column 9 key/value(s)

110

* @param seq

111

* the sequence the GFF feature is on

112

* @param gff

113

* the GFF column data

114

* @param align

115

* the alignment the sequence belongs to, where any new mappings

116

* should be added

117

* @param newseqs

118

* a list of new 'virtual sequences' generated while parsing GFF

119

* @param relaxedIdMatching

120

* if true allow fuzzy search for a matching target sequence

121

* @throws IOException

122

123

protected void processGffSimilarity(Map<String, List<String>> set,

124

SequenceI seq, String[] gff, AlignmentI align,

125

List<SequenceI> newseqs, boolean relaxedIdMatching)

throws IOException

{

* exonerate may be run with

130

* --showquerygff - outputs 'features on the query' e.g. (protein2genome)

131

* Target <dnaseqid> ; Align proteinStartPos dnaStartPos proteinCount

132

* --showtargetgff - outputs 'features on the target' e.g. (protein2genome)

133

* Query <proteinseqid> ; Align dnaStartPos proteinStartPos nucleotideCount

134

* where the Align spec may repeat

135

136

// TODO handle coding2coding and similar as well

137

boolean featureIsOnTarget = true;

138

List<String> mapTo = set.get(QUERY);

139

if (mapTo == null)

140

{

141

mapTo = set.get(TARGET);

142

featureIsOnTarget = false;

143

}

144

MappingType type = getMappingType(gff[SOURCE_COL]);

if (type == null)

{

throw new IOException("Sorry, I don't handle " + gff[SOURCE_COL]);

149

}

150

151

if (mapTo == null || mapTo.size() != 1)

152

{

153

throw new IOException(

154

"Expecting exactly one sequence in Query or Target field (got "

+ mapTo + ")");

}

* locate the mapped sequence in the alignment or 'new' (GFF file) sequences;

160

161

SequenceI mappedSequence = findSequence(mapTo.get(0), align, newseqs,

relaxedIdMatching);

* If mapping is from protein to dna, we store it as dna to protein instead

166

167

SequenceI mapFromSequence = seq;

168

SequenceI mapToSequence = mappedSequence;

169

if ((type == MappingType.NucleotideToPeptide && featureIsOnTarget)

170

|| (type == MappingType.PeptideToNucleotide

171

&& !featureIsOnTarget))

172

{

173

mapFromSequence = mappedSequence;

mapToSequence = seq;

}

* Process the Align maps and create mappings.

179

* These may be cdna-genome, cdna-protein, genome-protein.

180

* The mapped sequences may or may not be in the alignment

181

* (they may be included later in the GFF file).

* get any existing mapping for these sequences (or start one),

186

* and add this mapped range

187

188

AlignedCodonFrame acf = getMapping(align, mapFromSequence,

mapToSequence);

* exonerate GFF has the strand of the target in column 7

193

* (differs from GFF3 which has it in the Target descriptor)

194

195

String strand = gff[STRAND_COL];

196

boolean forwardStrand = true;

197

if ("-".equals(strand))

198

{

199

forwardStrand = false;

200

}

201

else if (!"+".equals(strand))

202

{

203

jalview.bin.Console

204

.errPrintln("Strand must be specified for alignment");

return;

}

List<String> alignedRegions = set.get(ALIGN);

209

for (String region : alignedRegions)

210

{

211

MapList mapping = buildMapping(region, type, forwardStrand,

212

featureIsOnTarget, gff);

if (mapping == null)

{

continue;

}

acf.addMap(mapFromSequence, mapToSequence, mapping);

220

}

221

align.addCodonFrame(acf);

}

/**

* Construct the mapping

* @param region

* @param type

* @param forwardStrand

230

* @param featureIsOnTarget

* @param gff

* @return

protected MapList buildMapping(String region, MappingType type,

235

boolean forwardStrand, boolean featureIsOnTarget, String[] gff)

236

{

237

238

* process one "fromStart toStart fromCount" descriptor

239

240

String[] tokens = region.split(" ");

241

if (tokens.length != 3)

242

{

243

jalview.bin.Console

244

.errPrintln("Malformed Align descriptor: " + region);

return null;

}

* get start/end of from/to mappings

250

* if feature is on the target sequence we have to invert the sense

int alignFromStart;

int alignToStart;

int alignCount;

try

{

alignFromStart = Integer.parseInt(tokens[0]);

258

alignToStart = Integer.parseInt(tokens[1]);

259

alignCount = Integer.parseInt(tokens[2]);

260

} catch (NumberFormatException nfe)

261

{

262

jalview.bin.Console.errPrintln(nfe.toString());

return null;

}

int fromStart;

int fromEnd;

int toStart;

int toEnd;

if (featureIsOnTarget)

272

{

273

fromStart = alignToStart;

274

toStart = alignFromStart;

275

toEnd = forwardStrand ? toStart + alignCount - 1

276

: toStart - (alignCount - 1);

277

int toLength = Math.abs(toEnd - toStart) + 1;

278

int fromLength = toLength * type.getFromRatio() / type.getToRatio();

279

fromEnd = fromStart + fromLength - 1;

}

else

{

// we use the 'Align' values here not the feature start/end

284

// not clear why they may differ but it seems they can

285

fromStart = alignFromStart;

286

fromEnd = alignFromStart + alignCount - 1;

287

int fromLength = fromEnd - fromStart + 1;

288

int toLength = fromLength * type.getToRatio() / type.getFromRatio();

289

toStart = alignToStart;

290

if (forwardStrand)

291

{

292

toEnd = toStart + toLength - 1;

}

else

{

toEnd = toStart - (toLength - 1);

}

}

MapList codonmapping = constructMappingFromAlign(fromStart, fromEnd,

301

toStart, toEnd, type);

return codonmapping;

}

/**

* Returns a MappingType depending on the exonerate 'model' value.

* @param model

* @return

protected static MappingType getMappingType(String model)

312

{

313

MappingType result = null;

314

315

if (model.contains(PROTEIN2DNA) || model.contains(PROTEIN2GENOME))

316

{

317

result = MappingType.PeptideToNucleotide;

318

}

319

else if (model.contains(CODING2CODING) || model.contains(CODING2GENOME)

320

|| model.contains(CDNA2GENOME) || model.contains(GENOME2GENOME))

321

{

322

result = MappingType.NucleotideToNucleotide;

}

return result;

}

/**

* Tests whether the GFF data looks like it was generated by exonerate, and is

329

* a format we are willing to handle

* @param columns

* @return

public static boolean recognises(String[] columns)

335

{

336

if (!SIMILARITY.equalsIgnoreCase(columns[TYPE_COL]))

{

return false;

}

* inspect alignment model

343

344

String model = columns[SOURCE_COL];

345

// e.g. exonerate:protein2genome:local

346

if (model != null)

347

{

348

String mdl = model.toLowerCase(Locale.ROOT);

349

if (mdl.contains(PROTEIN2DNA) || mdl.contains(PROTEIN2GENOME)

350

|| mdl.contains(CODING2CODING) || mdl.contains(CODING2GENOME)

351

|| mdl.contains(CDNA2GENOME) || mdl.contains(GENOME2GENOME))

{

return true;

}

}

jalview.bin.Console

.errPrintln("Sorry, I don't handle exonerate model " + model);

return false;

}

/**

* An override to set feature group to "exonerate" instead of the default GFF

363

* source value (column 2)

364

365

@Override

366

protected SequenceFeature buildSequenceFeature(String[] gff,

367

Map<String, List<String>> set)

368

{

369

SequenceFeature sf = super.buildSequenceFeature(gff, TYPE_COL,

"exonerate", set);

return sf;

}

}

Coverage Report

File ExonerateHelper.java

Coverage histogram

Code metrics

Classes

Class ExonerateHelper

Contributing tests

Contributing tests

Source view