File GffHelperBase.java

Branches:

Statements:

Methods:

Classes:

LOC:

440

NCLOC:

236

Total complexity:

Complexity density:

0.37

Statements/Method:

14.14

Methods/Class:

Average method complexity:

5.29

Classes

Class	Line #	Total Statements	Complexity	Uncovered Elements	TOTAL Coverage	Actions
GffHelperBase	44	99	37	11	0.9276315692.8%

Class GffHelperBase

Class GffHelperBase	Line # 44	Total Statements 99	Complexity 37	Uncovered Elements 11	TOTAL Coverage 0.9276315692.8%
constructMappingFromAlign(int,int,int,int,MappingType) : MapList constructMappingFromAlign(int,int,int,int,MappingType) : MapList	8484	13.013	3.03	3.03	0.8235294 0.823529482.4%
trimMapping(int[],int[],int,int) : boolean trimMapping(int[],int[],int,int) : boolean	137137	21.021	8.08	0.00	1.0 1.0100%
findSequence(String,AlignmentI,List<SequenceI>,boolean) : SequenceI findSequence(String,AlignmentI,List<SequenceI>,boolean) : SequenceI	212212	21.021	11.011	4.04	0.8918919 0.891891989.2%
parseNameValuePairs(String,String,char,String) : Map<String, List<String>> parseNameValuePairs(String,String,char,String) : Map<String, List<String>>	285285	20.020	7.07	2.02	0.93333334 0.9333333493.3%
buildSequenceFeature(String[],Map<String, List<String>>) : SequenceFeature buildSequenceFeature(String[],Map<String, List<String>>) : SequenceFeature	338338	1.01	1.01	0.00	1.0 1.0100%
buildSequenceFeature(String[],int,String,Map<String, List<String>>) : SequenceFeature buildSequenceFeature(String[],int,String,Map<String, List<String>>) : SequenceFeature	351351	19.019	5.05	2.02	0.9130435 0.913043591.3%
getMapping(AlignmentI,SequenceI,SequenceI) : AlignedCodonFrame getMapping(AlignmentI,SequenceI,SequenceI) : AlignedCodonFrame	429429	4.04	2.02	0.00	1.0 1.0100%

Contributing tests

This file is covered by 19 tests. .

Contributing tests

Test contribution	Test	Result
0.6184211	jalview.io.FeaturesFileTest.simpleGff3FileLoaderjalview.io.FeaturesFileTest.simpleGff3FileLoader	1PASS
0.6184211	jalview.io.FeaturesFileTest.simpleGff3RelaxedIdMatchingjalview.io.FeaturesFileTest.simpleGff3RelaxedIdMatching	1PASS
0.6184211	jalview.io.gff.ExonerateHelperTest.testAddExonerateGffToAlignmentjalview.io.gff.ExonerateHelperTest.testAddExonerateGffToAlignment	1PASS
0.6184211	jalview.io.FeaturesFileTest.readGff3Filejalview.io.FeaturesFileTest.readGff3File	1PASS
0.5986842	jalview.io.FeaturesFileTest.simpleGff3FileClassjalview.io.FeaturesFileTest.simpleGff3FileClass	1PASS
0.57894737	jalview.io.gff.Gff3HelperTest.testProcessCdnaMatch_splicedjalview.io.gff.Gff3HelperTest.testProcessCdnaMatch_spliced	1PASS
0.55263156	jalview.io.gff.Gff3HelperTest.testProcessCdnaMatch_forwardToForwardjalview.io.gff.Gff3HelperTest.testProcessCdnaMatch_forwardToForward	1PASS
0.55263156	jalview.io.gff.Gff3HelperTest.testProcessCdnaMatch_forwardToReversejalview.io.gff.Gff3HelperTest.testProcessCdnaMatch_forwardToReverse	1PASS
0.44078946	jalview.io.gff.GffTests.testResolveExonerateGffjalview.io.gff.GffTests.testResolveExonerateGff	1PASS
0.44078946	jalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_forward_querygffjalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_forward_querygff	1PASS
0.44078946	jalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_forward_targetgffjalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_forward_targetgff	1PASS
0.44078946	jalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_reverse_querygffjalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_reverse_querygff	1PASS
0.44078946	jalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_reverse_targetgffjalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_reverse_targetgff	1PASS
0.4276316	jalview.io.gff.InterProScanHelperTest.testProcessProteinMatchjalview.io.gff.InterProScanHelperTest.testProcessProteinMatch	1PASS
0.30921054	jalview.io.FeaturesFileTest.testParse_mixedJalviewGffjalview.io.FeaturesFileTest.testParse_mixedJalviewGff	1PASS
0.30921054	jalview.io.FeaturesFileTest.testParse_pureGff3jalview.io.FeaturesFileTest.testParse_pureGff3	1PASS
0.21052632	jalview.io.gff.GffHelperBaseTest.testTrimMappingjalview.io.gff.GffHelperBaseTest.testTrimMapping	1PASS
0.19078948	jalview.io.gff.GffHelperBaseTest.testParseNameValuePairsjalview.io.gff.GffHelperBaseTest.testParseNameValuePairs	1PASS
0.1513158	jalview.io.gff.Gff3HelperTest.testProcessCdnaMatch_reverseToForwardjalview.io.gff.Gff3HelperTest.testProcessCdnaMatch_reverseToForward	1PASS

Source view

* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)

* Copyright (C) $$Year-Rel$$ The Jalview Authors

* This file is part of Jalview.

* Jalview is free software: you can redistribute it and/or

* modify it under the terms of the GNU General Public License

* as published by the Free Software Foundation, either version 3

* of the License, or (at your option) any later version.

* Jalview is distributed in the hope that it will be useful, but

* WITHOUT ANY WARRANTY; without even the implied warranty

* of MERCHANTABILITY or FITNESS FOR A PARTICULAR

* PURPOSE. See the GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with Jalview. If not, see <http://www.gnu.org/licenses/>.

* The Jalview Authors are detailed in the 'AUTHORS' file.

package jalview.io.gff;

import jalview.analysis.SequenceIdMatcher;

import jalview.datamodel.AlignedCodonFrame;

import jalview.datamodel.AlignmentI;

import jalview.datamodel.MappingType;

import jalview.datamodel.SequenceDummy;

import jalview.datamodel.SequenceFeature;

import jalview.datamodel.SequenceI;

import jalview.util.MapList;

import jalview.util.StringUtils;

import java.util.ArrayList;

import java.util.Arrays;

import java.util.HashMap;

import java.util.List;

import java.util.Map;

import java.util.Map.Entry;

/**

* Base class with common functionality for flavours of GFF handler (GFF2 or

* GFF3)

public abstract class GffHelperBase implements GffHelperI

{

private static final String NOTE = "Note";

* GFF columns 1-9 (zero-indexed):

protected static final int SEQID_COL = 0;

protected static final int SOURCE_COL = 1;

protected static final int TYPE_COL = 2;

protected static final int START_COL = 3;

protected static final int END_COL = 4;

protected static final int SCORE_COL = 5;

protected static final int STRAND_COL = 6;

protected static final int PHASE_COL = 7;

protected static final int ATTRIBUTES_COL = 8;

private AlignmentI lastmatchedAl = null;

private SequenceIdMatcher matcher = null;

/**

* Constructs and returns a mapping, or null if data appear invalid

* @param fromStart

* @param fromEnd

* @param toStart

* @param toEnd

* @param mappingType

* type of mapping (e.g. protein to nucleotide)

* @return

protected MapList constructMappingFromAlign(int fromStart, int fromEnd,

int toStart, int toEnd, MappingType mappingType)

{

int[] from = new int[] { fromStart, fromEnd };

int[] to = new int[] { toStart, toEnd };

* Jalview always models from dna to protein, so switch values if the

* GFF mapping is from protein to dna

if (mappingType == MappingType.PeptideToNucleotide)

{

int[] temp = from;

from = to;

to = temp;

mappingType = mappingType.getInverse();

100

}

101

102

int fromRatio = mappingType.getFromRatio();

103

int toRatio = mappingType.getToRatio();

104

105

106

* sanity check that mapped residue counts match

107

* TODO understand why PASA generates such cases...

108

109

if (!trimMapping(from, to, fromRatio, toRatio))

110

{

111

System.err.println("Ignoring mapping from " + Arrays.toString(from)

112

+ " to " + Arrays.toString(to) + " as counts don't match!");

return null;

}

* If a codon has an intron gap, there will be contiguous 'toRanges';

118

* this is handled for us by the MapList constructor.

119

* (It is not clear that exonerate ever generates this case)

120

121

122

return new MapList(from, to, fromRatio, toRatio);

}

/**

* Checks that the 'from' and 'to' ranges have equivalent lengths. If not,

127

* tries to trim the end of the longer so they do. Returns true if the

128

* mappings could be made equivalent, else false. Note the range array values

129

* may be modified by this method.

* @param from

* @param to

* @param fromRatio

* @param toRatio

* @return

protected static boolean trimMapping(int[] from, int[] to, int fromRatio,

138

int toRatio)

139

{

140

int fromLength = Math.abs(from[1] - from[0]) + 1;

141

int toLength = Math.abs(to[1] - to[0]) + 1;

142

int fromOverlap = fromLength * toRatio - toLength * fromRatio;

143

if (fromOverlap == 0)

{

return true;

}

if (fromOverlap > 0 && fromOverlap % toRatio == 0)

148

{

149

150

* restrict from range to make them match up

151

* it's kind of arbitrary which end we truncate - here it is the end

152

153

System.err.print(

154

"Truncating mapping from " + Arrays.toString(from) + " to ");

155

if (from[1] > from[0])

156

{

157

from[1] -= fromOverlap / toRatio;

}

else

{

from[1] += fromOverlap / toRatio;

162

}

163

System.err.println(Arrays.toString(from));

164

return true;

165

}

166

else if (fromOverlap < 0 && fromOverlap % fromRatio == 0)

167

{

168

fromOverlap = -fromOverlap; // > 0

169

170

* restrict to range to make them match up

171

172

System.err.print(

173

"Truncating mapping to " + Arrays.toString(to) + " to ");

174

if (to[1] > to[0])

175

{

176

to[1] -= fromOverlap / fromRatio;

}

else

{

to[1] += fromOverlap / fromRatio;

181

}

182

System.err.println(Arrays.toString(to));

return true;

}

* Couldn't truncate to an exact match..

return false;

}

/**

* Returns a sequence matching the given id, as follows

194

* <ul>

195

* <li>strict matching is on exact sequence name</li>

196

* <li>relaxed matching allows matching on a token within the sequence name,

197

* or a dbxref</li>

198

* <li>first tries to find a match in the alignment sequences</li>

199

* <li>else tries to find a match in the new sequences already generated while

200

* parsing the features file</li>

201

* <li>else creates a new placeholder sequence, adds it to the new sequences

202

* list, and returns it</li>

* </ul>

* @param seqId

* @param align

* @param newseqs

* @param relaxedIdMatching

* @return

protected SequenceI findSequence(String seqId, AlignmentI align,

213

List<SequenceI> newseqs, boolean relaxedIdMatching)

{

if (seqId == null)

{

return null;

}

SequenceI match = null;

220

if (relaxedIdMatching)

221

{

222

if (lastmatchedAl != align)

223

{

224

lastmatchedAl = align;

225

matcher = new SequenceIdMatcher(align.getSequencesArray());

226

if (newseqs != null)

227

{

228

matcher.addAll(newseqs);

229

}

230

}

231

match = matcher.findIdMatch(seqId);

}

else

{

match = align.findName(seqId, true);

236

if (match == null && newseqs != null)

237

{

238

for (SequenceI m : newseqs)

239

{

240

if (seqId.equals(m.getName()))

{

return m;

}

}

}

}

if (match == null && newseqs != null)

249

{

250

match = new SequenceDummy(seqId);

251

if (relaxedIdMatching)

252

{

253

matcher.addAll(Arrays.asList(new SequenceI[] { match }));

254

}

255

// add dummy sequence to the newseqs list

newseqs.add(match);

}

return match;

}

/**

* Parses the input line to a map of name / value(s) pairs. For example the

263

* line <br>

264

* Notes=Fe-S;Method=manual curation, prediction; source = Pfam; Notes = Metal

265

* <br>

266

* if parsed with delimiter=";" and separators {' ', '='} <br>

267

* would return a map with { Notes={Fe=S, Metal}, Method={manual curation,

268

* prediction}, source={Pfam}} <br>

269

270

* This method supports parsing of either GFF2 format (which uses space ' ' as

271

* the name/value delimiter, and allows multiple occurrences of the same

272

* name), or GFF3 format (which uses '=' as the name/value delimiter, and

273

* strictly does not allow repeat occurrences of the same name - but does

274

* allow a comma-separated list of values).

275

276

* @param text

277

* @param namesDelimiter

278

* the major delimiter between name-value pairs

279

* @param nameValueSeparator

280

* one or more separators used between name and value

281

* @param valuesDelimiter

282

* delimits a list of more than one value

283

* @return the name-values map (which may be empty but never null)

284

285

public static Map<String, List<String>> parseNameValuePairs(String text,

286

String namesDelimiter, char nameValueSeparator,

287

String valuesDelimiter)

288

{

289

Map<String, List<String>> map = new HashMap<String, List<String>>();

290

if (text == null || text.trim().length() == 0)

{

return map;

}

for (String pair : text.trim().split(namesDelimiter))

296

{

297

pair = pair.trim();

298

if (pair.length() == 0)

{

continue;

}

int sepPos = pair.indexOf(nameValueSeparator);

304

if (sepPos == -1)

305

{

306

// no name=value present

continue;

}

String key = pair.substring(0, sepPos).trim();

311

String values = pair.substring(sepPos + 1).trim();

312

if (values.length() > 0)

313

{

314

List<String> vals = map.get(key);

315

if (vals == null)

316

{

317

vals = new ArrayList<String>();

318

map.put(key, vals);

319

}

320

for (String val : values.split(valuesDelimiter))

{

vals.add(val);

}

}

}

return map;

}

/**

* Constructs a SequenceFeature from the GFF column data. Subclasses may wish

331

* to call this method then adjust the SequenceFeature depending on the

332

* particular usage of different tools that generate GFF.

* @param gff

* @param attributes

* @return

protected SequenceFeature buildSequenceFeature(String[] gff,

339

Map<String, List<String>> attributes)

340

{

341

return buildSequenceFeature(gff, TYPE_COL, gff[SOURCE_COL], attributes);

}

/**

* @param gff

* @param typeColumn

* @param group

* @param attributes

* @return

protected SequenceFeature buildSequenceFeature(String[] gff,

352

int typeColumn, String group, Map<String, List<String>> attributes)

{

try

{

int start = Integer.parseInt(gff[START_COL]);

357

int end = Integer.parseInt(gff[END_COL]);

358

359

360

* default 'score' is 0 rather than Float.NaN as the latter currently

361

* disables the 'graduated colour => colour by label' option

float score = 0f;

try

{

score = Float.parseFloat(gff[SCORE_COL]);

367

} catch (NumberFormatException nfe)

368

{

369

// e.g. '.' - leave as zero

370

}

371

372

SequenceFeature sf = new SequenceFeature(gff[typeColumn],

373

gff[SOURCE_COL], start, end, score, group);

374

375

sf.setStrand(gff[STRAND_COL]);

376

377

sf.setPhase(gff[PHASE_COL]);

378

379

if (attributes != null)

380

{

381

382

* save 'raw' column 9 to allow roundtrip output as input

383

384

sf.setAttributes(gff[ATTRIBUTES_COL]);

385

386

387

* Add attributes in column 9 to the sequence feature's

388

* 'otherData' table; use Note as a best proxy for description

389

390

for (Entry<String, List<String>> attr : attributes.entrySet())

391

{

392

String values = StringUtils.listToDelimitedString(attr.getValue(),

393

",");

394

sf.setValue(attr.getKey(), values);

395

if (NOTE.equals(attr.getKey()))

396

{

397

sf.setDescription(values);

}

}

}

return sf;

} catch (NumberFormatException nfe)

404

{

405

System.err.println("Invalid number in gff: " + nfe.getMessage());

return null;

}

}

/**

* Returns the character used to separate attributes names from values in GFF

412

* column 9. This is space for GFF2, '=' for GFF3.

* @return

protected abstract char getNameValueSeparator();

417

418

/**

419

* Returns any existing mapping held on the alignment between the given

420

* dataset sequences, or a new one if none found. This is a convenience method

421

* to facilitate processing multiple GFF lines that make up a single 'spliced'

422

* mapping, by extending the first mapping as the others are read.

* @param align

* @param fromSeq

* @param toSeq

* @return

protected AlignedCodonFrame getMapping(AlignmentI align,

430

SequenceI fromSeq, SequenceI toSeq)

431

{

432

AlignedCodonFrame acf = align.getMapping(fromSeq, toSeq);

433

if (acf == null)

434

{

435

acf = new AlignedCodonFrame();

}

return acf;

}

}

jalviewX

File GffHelperBase.java

Coverage histogram

Code metrics

Classes

Class GffHelperBase

Contributing tests

Contributing tests

Source view