File GffHelperBase.java

Branches:

Statements:

135

Methods:

Classes:

LOC:

571

NCLOC:

309

Total complexity:

Complexity density:

0.39

Statements/Method:

16.88

Methods/Class:

Average method complexity:

6.62

Classes

Class	Line #	Total Statements	Complexity	TOTAL Coverage	Actions
GffHelperBase	44	135	53	0.947867394.8%

Class GffHelperBase

Class GffHelperBase	Line # 44	Total Statements 135	Complexity 53	TOTAL Coverage 0.947867394.8%
constructMappingFromAlign(int,int,int,int,MappingType) : MapList constructMappingFromAlign(int,int,int,int,MappingType) : MapList	9090	13.013	3.03	0.8235294 0.823529482.4%
trimMapping(int[],int[],int,int) : boolean trimMapping(int[],int[],int,int) : boolean	143143	21.021	8.08	1.0 1.0100%
findSequence(String,AlignmentI,List<SequenceI>,boolean) : SequenceI findSequence(String,AlignmentI,List<SequenceI>,boolean) : SequenceI	218218	21.021	11.011	0.8918919 0.891891989.2%
parseNameValuePairs(String,String,char,String) : Map<String, List<String>> parseNameValuePairs(String,String,char,String) : Map<String, List<String>>	296296	23.023	8.08	0.94285715 0.9428571594.3%
buildSequenceFeature(String[],Map<String, List<String>>) : SequenceFeature buildSequenceFeature(String[],Map<String, List<String>>) : SequenceFeature	370370	1.01	1.01	1.0 1.0100%
buildSequenceFeature(String[],int,String,Map<String, List<String>>) : SequenceFeature buildSequenceFeature(String[],int,String,Map<String, List<String>>) : SequenceFeature	383383	24.024	7.07	0.93333334 0.9333333493.3%
parseAttributeMap(String) : Map<String, String> parseAttributeMap(String) : Map<String, String>	467467	28.028	13.013	1.0 1.0100%
getMapping(AlignmentI,SequenceI,SequenceI) : AlignedCodonFrame getMapping(AlignmentI,SequenceI,SequenceI) : AlignedCodonFrame	560560	4.04	2.02	1.0 1.0100%

Contributing tests

This file is covered by 20 tests. .

Contributing tests

Test contribution	Test	Result
0.48341233	jalview.io.FeaturesFileTest.simpleGff3RelaxedIdMatchingjalview.io.FeaturesFileTest.simpleGff3RelaxedIdMatching	1PASS
0.48341233	jalview.io.gff.ExonerateHelperTest.testAddExonerateGffToAlignmentjalview.io.gff.ExonerateHelperTest.testAddExonerateGffToAlignment	1PASS
0.48341233	jalview.io.FeaturesFileTest.readGff3Filejalview.io.FeaturesFileTest.readGff3File	1PASS
0.48341233	jalview.io.FeaturesFileTest.simpleGff3FileLoaderjalview.io.FeaturesFileTest.simpleGff3FileLoader	1PASS
0.46919432	jalview.io.FeaturesFileTest.simpleGff3FileClassjalview.io.FeaturesFileTest.simpleGff3FileClass	1PASS
0.44549763	jalview.io.gff.Gff3HelperTest.testProcessCdnaMatch_splicedjalview.io.gff.Gff3HelperTest.testProcessCdnaMatch_spliced	1PASS
0.43601894	jalview.io.FeaturesFileTest.testParse_pureGff3jalview.io.FeaturesFileTest.testParse_pureGff3	1PASS
0.4265403	jalview.io.gff.Gff3HelperTest.testProcessCdnaMatch_forwardToReversejalview.io.gff.Gff3HelperTest.testProcessCdnaMatch_forwardToReverse	1PASS
0.4265403	jalview.io.gff.Gff3HelperTest.testProcessCdnaMatch_forwardToForwardjalview.io.gff.Gff3HelperTest.testProcessCdnaMatch_forwardToForward	1PASS
0.3364929	jalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_reverse_querygffjalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_reverse_querygff	1PASS
0.3364929	jalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_forward_targetgffjalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_forward_targetgff	1PASS
0.3364929	jalview.io.gff.GffTests.testResolveExonerateGffjalview.io.gff.GffTests.testResolveExonerateGff	1PASS
0.3364929	jalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_forward_querygffjalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_forward_querygff	1PASS
0.3364929	jalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_reverse_targetgffjalview.io.gff.ExonerateHelperTest.testProcessGffSimilarity_protein2dna_reverse_targetgff	1PASS
0.3364929	jalview.io.gff.InterProScanHelperTest.testProcessProteinMatchjalview.io.gff.InterProScanHelperTest.testProcessProteinMatch	1PASS
0.25118482	jalview.io.FeaturesFileTest.testParse_mixedJalviewGffjalview.io.FeaturesFileTest.testParse_mixedJalviewGff	1PASS
0.22274882	jalview.io.gff.GffHelperBaseTest.testParseAttributeMapjalview.io.gff.GffHelperBaseTest.testParseAttributeMap	1PASS
0.16113745	jalview.io.gff.GffHelperBaseTest.testParseNameValuePairsjalview.io.gff.GffHelperBaseTest.testParseNameValuePairs	1PASS
0.15165877	jalview.io.gff.GffHelperBaseTest.testTrimMappingjalview.io.gff.GffHelperBaseTest.testTrimMapping	1PASS
0.11848341	jalview.io.gff.Gff3HelperTest.testProcessCdnaMatch_reverseToForwardjalview.io.gff.Gff3HelperTest.testProcessCdnaMatch_reverseToForward	1PASS

Source view

* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)

* Copyright (C) $$Year-Rel$$ The Jalview Authors

* This file is part of Jalview.

* Jalview is free software: you can redistribute it and/or

* modify it under the terms of the GNU General Public License

* as published by the Free Software Foundation, either version 3

* of the License, or (at your option) any later version.

* Jalview is distributed in the hope that it will be useful, but

* WITHOUT ANY WARRANTY; without even the implied warranty

* of MERCHANTABILITY or FITNESS FOR A PARTICULAR

* PURPOSE. See the GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with Jalview. If not, see <http://www.gnu.org/licenses/>.

* The Jalview Authors are detailed in the 'AUTHORS' file.

package jalview.io.gff;

import jalview.analysis.SequenceIdMatcher;

import jalview.datamodel.AlignedCodonFrame;

import jalview.datamodel.AlignmentI;

import jalview.datamodel.MappingType;

import jalview.datamodel.SequenceDummy;

import jalview.datamodel.SequenceFeature;

import jalview.datamodel.SequenceI;

import jalview.util.MapList;

import jalview.util.StringUtils;

import java.util.ArrayList;

import java.util.Arrays;

import java.util.HashMap;

import java.util.List;

import java.util.Map;

import java.util.Map.Entry;

/**

* Base class with common functionality for flavours of GFF handler (GFF2 or

* GFF3)

public abstract class GffHelperBase implements GffHelperI

{

private static final String INVALID_GFF_ATTRIBUTE_FORMAT = "Invalid GFF attribute format: ";

protected static final String COMMA = ",";

protected static final String EQUALS = "=";

protected static final String NOTE = "Note";

* GFF columns 1-9 (zero-indexed):

protected static final int SEQID_COL = 0;

protected static final int SOURCE_COL = 1;

protected static final int TYPE_COL = 2;

protected static final int START_COL = 3;

protected static final int END_COL = 4;

protected static final int SCORE_COL = 5;

protected static final int STRAND_COL = 6;

protected static final int PHASE_COL = 7;

protected static final int ATTRIBUTES_COL = 8;

private AlignmentI lastmatchedAl = null;

private SequenceIdMatcher matcher = null;

/**

* Constructs and returns a mapping, or null if data appear invalid

* @param fromStart

* @param fromEnd

* @param toStart

* @param toEnd

* @param mappingType

* type of mapping (e.g. protein to nucleotide)

* @return

protected MapList constructMappingFromAlign(int fromStart, int fromEnd,

int toStart, int toEnd, MappingType mappingType)

{

int[] from = new int[] { fromStart, fromEnd };

int[] to = new int[] { toStart, toEnd };

* Jalview always models from dna to protein, so switch values if the

* GFF mapping is from protein to dna

100

if (mappingType == MappingType.PeptideToNucleotide)

{

int[] temp = from;

from = to;

to = temp;

mappingType = mappingType.getInverse();

106

}

107

108

int fromRatio = mappingType.getFromRatio();

109

int toRatio = mappingType.getToRatio();

110

111

112

* sanity check that mapped residue counts match

113

* TODO understand why PASA generates such cases...

114

115

if (!trimMapping(from, to, fromRatio, toRatio))

116

{

117

System.err.println("Ignoring mapping from " + Arrays.toString(from)

118

+ " to " + Arrays.toString(to) + " as counts don't match!");

return null;

}

* If a codon has an intron gap, there will be contiguous 'toRanges';

124

* this is handled for us by the MapList constructor.

125

* (It is not clear that exonerate ever generates this case)

126

127

128

return new MapList(from, to, fromRatio, toRatio);

}

/**

* Checks that the 'from' and 'to' ranges have equivalent lengths. If not,

133

* tries to trim the end of the longer so they do. Returns true if the

134

* mappings could be made equivalent, else false. Note the range array values

135

* may be modified by this method.

* @param from

* @param to

* @param fromRatio

* @param toRatio

* @return

protected static boolean trimMapping(int[] from, int[] to, int fromRatio,

144

int toRatio)

145

{

146

int fromLength = Math.abs(from[1] - from[0]) + 1;

147

int toLength = Math.abs(to[1] - to[0]) + 1;

148

int fromOverlap = fromLength * toRatio - toLength * fromRatio;

149

if (fromOverlap == 0)

{

return true;

}

if (fromOverlap > 0 && fromOverlap % toRatio == 0)

154

{

155

156

* restrict from range to make them match up

157

* it's kind of arbitrary which end we truncate - here it is the end

158

159

System.err.print(

160

"Truncating mapping from " + Arrays.toString(from) + " to ");

161

if (from[1] > from[0])

162

{

163

from[1] -= fromOverlap / toRatio;

}

else

{

from[1] += fromOverlap / toRatio;

168

}

169

System.err.println(Arrays.toString(from));

170

return true;

171

}

172

else if (fromOverlap < 0 && fromOverlap % fromRatio == 0)

173

{

174

fromOverlap = -fromOverlap; // > 0

175

176

* restrict to range to make them match up

177

178

System.err.print(

179

"Truncating mapping to " + Arrays.toString(to) + " to ");

180

if (to[1] > to[0])

181

{

182

to[1] -= fromOverlap / fromRatio;

}

else

{

to[1] += fromOverlap / fromRatio;

187

}

188

System.err.println(Arrays.toString(to));

return true;

}

* Couldn't truncate to an exact match..

return false;

}

/**

* Returns a sequence matching the given id, as follows

200

* <ul>

201

* <li>strict matching is on exact sequence name</li>

202

* <li>relaxed matching allows matching on a token within the sequence name,

203

* or a dbxref</li>

204

* <li>first tries to find a match in the alignment sequences</li>

205

* <li>else tries to find a match in the new sequences already generated while

206

* parsing the features file</li>

207

* <li>else creates a new placeholder sequence, adds it to the new sequences

208

* list, and returns it</li>

* </ul>

* @param seqId

* @param align

* @param newseqs

* @param relaxedIdMatching

* @return

protected SequenceI findSequence(String seqId, AlignmentI align,

219

List<SequenceI> newseqs, boolean relaxedIdMatching)

{

if (seqId == null)

{

return null;

}

SequenceI match = null;

226

if (relaxedIdMatching)

227

{

228

if (lastmatchedAl != align)

229

{

230

lastmatchedAl = align;

231

matcher = new SequenceIdMatcher(align.getSequencesArray());

232

if (newseqs != null)

233

{

234

matcher.addAll(newseqs);

235

}

236

}

237

match = matcher.findIdMatch(seqId);

}

else

{

match = align.findName(seqId, true);

242

if (match == null && newseqs != null)

243

{

244

for (SequenceI m : newseqs)

245

{

246

if (seqId.equals(m.getName()))

{

return m;

}

}

}

}

if (match == null && newseqs != null)

255

{

256

match = new SequenceDummy(seqId);

257

if (relaxedIdMatching)

258

{

259

matcher.addAll(Arrays.asList(new SequenceI[] { match }));

260

}

261

// add dummy sequence to the newseqs list

newseqs.add(match);

}

return match;

}

/**

* Parses the input line to a map of name / value(s) pairs. For example the

* line

* <pre>

* Notes=Fe-S;Method=manual curation, prediction; source = Pfam; Notes = Metal

273

* </pre>

274

275

* if parsed with delimiter=";" and separators {' ', '='} <br>

276

* would return a map with { Notes={Fe=S, Metal}, Method={manual curation,

277

* prediction}, source={Pfam}} <br>

278

279

* This method supports parsing of either GFF2 format (which uses space ' ' as

280

* the name/value delimiter, and allows multiple occurrences of the same

281

* name), or GFF3 format (which uses '=' as the name/value delimiter, and

282

* strictly does not allow repeat occurrences of the same name - but does

283

* allow a comma-separated list of values).

284

* <p>

285

* Returns a (possibly empty) map of lists of values by attribute name.

286

287

* @param text

288

* @param namesDelimiter

289

* the major delimiter between name-value pairs

290

* @param nameValueSeparator

291

* separator used between name and value

292

* @param valuesDelimiter

293

* delimits a list of more than one value

294

* @return

295

296

public static Map<String, List<String>> parseNameValuePairs(String text,

297

String namesDelimiter, char nameValueSeparator,

298

String valuesDelimiter)

299

{

300

Map<String, List<String>> map = new HashMap<>();

301

if (text == null || text.trim().length() == 0)

{

return map;

}

* split by major delimiter (; for GFF3)

308

309

for (String nameValuePair : text.trim().split(namesDelimiter))

310

{

311

nameValuePair = nameValuePair.trim();

312

if (nameValuePair.length() == 0)

{

continue;

}

* find name/value separator (= for GFF3)

319

320

int sepPos = nameValuePair.indexOf(nameValueSeparator);

321

if (sepPos == -1)

322

{

323

// no name=value found

continue;

}

String name = nameValuePair.substring(0, sepPos).trim();

328

String values = nameValuePair.substring(sepPos + 1).trim();

329

if (values.isEmpty())

{

continue;

}

List<String> vals = map.get(name);

335

if (vals == null)

336

{

337

vals = new ArrayList<>();

map.put(name, vals);

}

* if 'values' contains more name/value separators, parse as a map

343

* (nested sub-attribute values)

344

345

if (values.indexOf(nameValueSeparator) != -1)

{

vals.add(values);

}

else

{

for (String val : values.split(valuesDelimiter))

{

vals.add(val);

}

}

}

return map;

}

/**

* Constructs a SequenceFeature from the GFF column data. Subclasses may wish

363

* to call this method then adjust the SequenceFeature depending on the

364

* particular usage of different tools that generate GFF.

* @param gff

* @param attributes

* @return

protected SequenceFeature buildSequenceFeature(String[] gff,

371

Map<String, List<String>> attributes)

372

{

373

return buildSequenceFeature(gff, TYPE_COL, gff[SOURCE_COL], attributes);

}

/**

* @param gff

* @param typeColumn

* @param group

* @param attributes

* @return

protected SequenceFeature buildSequenceFeature(String[] gff,

384

int typeColumn, String group, Map<String, List<String>> attributes)

{

try

{

int start = Integer.parseInt(gff[START_COL]);

389

int end = Integer.parseInt(gff[END_COL]);

390

391

392

* default 'score' is 0 rather than Float.NaN - see JAL-2554

float score = 0f;

try

{

score = Float.parseFloat(gff[SCORE_COL]);

398

} catch (NumberFormatException nfe)

399

{

400

// e.g. '.' - leave as zero

401

}

402

403

SequenceFeature sf = new SequenceFeature(gff[typeColumn],

404

gff[SOURCE_COL], start, end, score, group);

405

406

sf.setStrand(gff[STRAND_COL]);

407

408

sf.setPhase(gff[PHASE_COL]);

409

410

if (attributes != null)

411

{

412

413

* Add attributes in column 9 to the sequence feature's

414

* 'otherData' table; use Note as a best proxy for description;

415

* decode any encoded comma, equals, semi-colon as per GFF3 spec

416

417

for (Entry<String, List<String>> attr : attributes.entrySet())

418

{

419

String key = attr.getKey();

420

List<String> values = attr.getValue();

421

if (values.size() == 1 && values.get(0).contains(EQUALS))

422

{

423

424

* 'value' is actually nested subattributes as x=a,y=b,z=c

425

426

Map<String, String> valueMap = parseAttributeMap(values.get(0));

427

sf.setValue(key, valueMap);

}

else

{

String csvValues = StringUtils.listToDelimitedString(values,

432

COMMA);

433

csvValues = StringUtils.urlDecode(csvValues, GFF_ENCODABLE);

434

sf.setValue(key, csvValues);

435

if (NOTE.equals(key))

436

{

437

sf.setDescription(csvValues);

}

}

}

}

return sf;

} catch (NumberFormatException nfe)

445

{

446

System.err.println("Invalid number in gff: " + nfe.getMessage());

return null;

}

}

/**

* Parses a (GFF3 format) list of comma-separated key=value pairs into a Map

453

* of {@code key,

454

* value} <br>

455

* An input string like {@code a=b,c,d=e,f=g,h} is parsed to

* <pre>

* a = "b,c"

* d = "e"

* f = "g,h"

* </pre>

* @param s

* @return

protected static Map<String, String> parseAttributeMap(String s)

468

{

469

Map<String, String> map = new HashMap<>();

470

String[] fields = s.split(EQUALS);

* format validation

boolean valid = true;

476

if (fields.length < 2)

477

{

478

479

* need at least A=B here

valid = false;

}

else if (fields[0].isEmpty() || fields[0].contains(COMMA))

484

{

485

486

* A,B=C is not a valid start, nor is =C

valid = false;

}

else

{

for (int i = 1; i < fields.length - 1; i++)

493

{

494

if (fields[i].isEmpty() || !fields[i].contains(COMMA))

495

{

496

497

* intermediate tokens must include value,name

valid = false;

}

}

}

if (!valid)

{

System.err.println(INVALID_GFF_ATTRIBUTE_FORMAT + s);

return map;

}

int i = 0;

while (i < fields.length - 1)

512

{

513

boolean lastPair = i == fields.length - 2;

514

String before = fields[i];

515

String after = fields[i + 1];

516

517

518

* if 'key' looks like a,b,c then the last token is the

519

* key

520

521

String theKey = before.contains(COMMA)

522

? before.substring(before.lastIndexOf(COMMA) + 1)

523

: before;

524

525

theKey = theKey.trim();

526

if (theKey.isEmpty())

527

{

528

System.err.println(INVALID_GFF_ATTRIBUTE_FORMAT + s);

map.clear();

return map;

}

* if 'value' looks like a,b,c then all but the last token is the value,

535

* unless this is the last field (no more = to follow), in which case

536

* all of it makes up the value

537

538

String theValue = after.contains(COMMA) && !lastPair

539

? after.substring(0, after.lastIndexOf(COMMA))

540

: after;

541

map.put(StringUtils.urlDecode(theKey, GFF_ENCODABLE),

542

StringUtils.urlDecode(theValue, GFF_ENCODABLE));

i += 1;

}

return map;

}

/**

* Returns any existing mapping held on the alignment between the given

551

* dataset sequences, or a new one if none found. This is a convenience method

552

* to facilitate processing multiple GFF lines that make up a single 'spliced'

553

* mapping, by extending the first mapping as the others are read.

* @param align

* @param fromSeq

* @param toSeq

* @return

protected AlignedCodonFrame getMapping(AlignmentI align,

561

SequenceI fromSeq, SequenceI toSeq)

562

{

563

AlignedCodonFrame acf = align.getMapping(fromSeq, toSeq);

564

if (acf == null)

565

{

566

acf = new AlignedCodonFrame();

}

return acf;

}

}

Coverage Report

File GffHelperBase.java

Coverage histogram

Code metrics

Classes

Class GffHelperBase

Contributing tests

Contributing tests

Source view