File AlignmentUtilsTests.java

Branches:

Statements:

1,281

Methods:

Classes:

LOC:

2,948

NCLOC:

1,905

Total complexity:

Complexity density:

0.05

Statements/Method:

23.72

Methods/Class:

Average method complexity:

1.13

Classes

Class	Line #	Total Statements	Complexity	TOTAL Coverage	Actions
AlignmentUtilsTests	76	1,281	61	0.934766593.5%

Class AlignmentUtilsTests

Class AlignmentUtilsTests	Line # 76	Total Statements 1,281	Complexity 61	TOTAL Coverage 0.934766593.5%
setUpJvOptionPane() : void setUpJvOptionPane() : void	8181	11.011	1.01	1.0 1.0100%
testExpandContext() : void testExpandContext() : void	111111	20.020	5.05	1.0 1.0100%
testExpandContext_annotation() : void testExpandContext_annotation() : void	165165	43.043	1.01	1.0 1.0100%
testGetSequencesByName() : void testGetSequencesByName() : void	254254	9.09	1.01	1.0 1.0100%
loadAlignment(String,FileFormatI) : AlignmentI loadAlignment(String,FileFormatI) : AlignmentI	279279	3.03	1.01	1.0 1.0100%
testMapProteinAlignmentToCdna_noXrefs() : void testMapProteinAlignmentToCdna_noXrefs() : void	295295	37.037	1.01	1.0 1.0100%
testAlignSequenceAs_withMapping_noIntrons() : void testAlignSequenceAs_withMapping_noIntrons() : void	360360	6.06	1.01	1.0 1.0100%
testAlignSequenceAs_withMapping_withIntrons() : void testAlignSequenceAs_withMapping_withIntrons() : void	404404	7.07	1.01	1.0 1.0100%
testAlignSequenceAs_withMapping_withUnmappedProtein() : void testAlignSequenceAs_withMapping_withUnmappedProtein() : void	454454	2.02	1.01	1.0 1.0100%
checkAlignSequenceAs(String,String,boolean,boolean,MapList,String) : void checkAlignSequenceAs(String,String,boolean,boolean,MapList,String) : void	483483	8.08	1.01	1.0 1.0100%
testAlignSequenceAs_keepIntronGapsOnly() : void testAlignSequenceAs_keepIntronGapsOnly() : void	504504	2.02	1.01	1.0 1.0100%
testAlignProteinAsDna() : void testAlignProteinAsDna() : void	521521	24.024	1.01	1.0 1.0100%
testTranslatesAs() : void testTranslatesAs() : void	568568	20.020	1.01	1.0 1.0100%
testMapProteinAlignmentToCdna_withStartAndStopCodons() : void testMapProteinAlignmentToCdna_withStartAndStopCodons() : void	643643	55.055	1.01	1.0 1.0100%
testMapProteinAlignmentToCdna_withXrefs() : void testMapProteinAlignmentToCdna_withXrefs() : void	742742	37.037	1.01	1.0 1.0100%
testMapProteinAlignmentToCdna_prioritiseXrefs() : void testMapProteinAlignmentToCdna_prioritiseXrefs() : void	818818	23.023	1.01	1.0 1.0100%
testShowOrHideSequenceAnnotations() : void testShowOrHideSequenceAnnotations() : void	869869	71.071	1.01	1.0 1.0100%
testHasCrossRef() : void testHasCrossRef() : void	979979	13.013	1.01	1.0 1.0100%
testHaveCrossRef() : void testHaveCrossRef() : void	10081008	16.016	1.01	1.0 1.0100%
testMakeCdsAlignment() : void testMakeCdsAlignment() : void	10381038	124.0124	1.01	1.0 1.0100%
testMakeCdsAlignment_multipleProteins() : void testMakeCdsAlignment_multipleProteins() : void	12871287	68.068	1.01	1.0 1.0100%
testIsMappable() : void testIsMappable() : void	14641464	11.011	1.01	1.0 1.0100%
testMapCdnaToProtein_forSubsequence() : void testMapCdnaToProtein_forSubsequence() : void	14881488	9.09	1.01	1.0 1.0100%
testAlignSequenceAs_mappedProteinProtein() : void testAlignSequenceAs_mappedProteinProtein() : void	15071507	9.09	1.01	1.0 1.0100%
testAlignSequenceAs_withTrailingPeptide() : void testAlignSequenceAs_withTrailingPeptide() : void	15321532	2.02	1.01	1.0 1.0100%
testTransferFeatures() : void testTransferFeatures() : void	15451545	43.043	1.01	1.0 1.0100%
testTransferFeatures_withOmit() : void testTransferFeatures_withOmit() : void	16291629	13.013	1.01	1.0 1.0100%
testTransferFeatures_withSelect() : void testTransferFeatures_withSelect() : void	16631663	13.013	1.01	1.0 1.0100%
testMakeCdsAlignment_alternativeTranscripts() : void testMakeCdsAlignment_alternativeTranscripts() : void	16981698	70.070	1.01	1.0 1.0100%
testAlignProteinAsDna_incompleteStartCodon() : void testAlignProteinAsDna_incompleteStartCodon() : void	18191819	24.024	1.01	1.0 1.0100%
testFindCdsPositions_fivePrimeIncomplete() : void testFindCdsPositions_fivePrimeIncomplete() : void	18731873	15.015	1.01	1.0 1.0100%
testFindCdsPositions() : void testFindCdsPositions() : void	19051905	18.018	1.01	1.0 1.0100%
testFindCdsPositions_reverseStrand() : void testFindCdsPositions_reverseStrand() : void	19451945	18.018	1.01	0.0 0.00%
testFindCdsPositions_reverseStrandThreePrimeIncomplete() : void testFindCdsPositions_reverseStrandThreePrimeIncomplete() : void	19811981	17.017	1.01	0.0 0.00%
testAlignAs_alternateTranscriptsUngapped() : void testAlignAs_alternateTranscriptsUngapped() : void	20142014	17.017	1.01	1.0 1.0100%
testAddMappedPositions() : void testAddMappedPositions() : void	20472047	14.014	1.01	1.0 1.0100%
testAddMappedPositions_withStopCodon() : void testAddMappedPositions_withStopCodon() : void	20802080	14.014	1.01	1.0 1.0100%
testMakeCdsAlignment_filterProducts() : void testMakeCdsAlignment_filterProducts() : void	21112111	70.070	1.01	1.0 1.0100%
testAlignAsSameSequences() : void testAlignAsSameSequences() : void	22462246	29.029	1.01	1.0 1.0100%
testAlignAsSameSequencesMultipleSubSeq() : void testAlignAsSameSequencesMultipleSubSeq() : void	22962296	22.022	1.01	1.0 1.0100%
testTransferGeneLoci() : void testTransferGeneLoci() : void	23312331	33.033	1.01	1.0 1.0100%
testMapCdsToProtein() : void testMapCdsToProtein() : void	23982398	50.050	1.01	1.0 1.0100%
testFindCdsForProtein() : void testFindCdsForProtein() : void	24922492	25.025	1.01	1.0 1.0100%
testFindCdsForProtein_noUTR() : void testFindCdsForProtein_noUTR() : void	25622562	28.028	1.01	1.0 1.0100%
testAddReferenceAnnotations() : void testAddReferenceAnnotations() : void	26362636	31.031	2.02	1.0 1.0100%
verifyExpectedSequenceAnnotation(AlignmentAnnotation) : void verifyExpectedSequenceAnnotation(AlignmentAnnotation) : void	27092709	5.05	3.03	0.7777778 0.777777877.8%
testAddReferenceContactMap() : void testAddReferenceContactMap() : void	27282728	28.028	1.01	1.0 1.0100%
testSecondaryStructurePresentAndSources(AlignmentAnnotation[],boolean,ArrayList<String>) : void testSecondaryStructurePresentAndSources(AlignmentAnnotation[],boolean,ArrayList<String>) : void	27832783	1.01	1.01	1.0 1.0100%
provideSecondaryStructureAnnotations() : Object[][] provideSecondaryStructureAnnotations() : Object[][]	27942794	9.09	1.01	1.0 1.0100%
testSecondaryStructureAnnotationColour(char,Color) : void testSecondaryStructureAnnotationColour(char,Color) : void	28262826	2.02	1.01	0.0 0.00%
provideSecondaryStructureAnnotationColours() : Object[][] provideSecondaryStructureAnnotationColours() : Object[][]	28352835	1.01	1.01	0.0 0.00%
testIsSSAnnotationPresent(Map<SequenceI, List<AlignmentAnnotation>>,boolean) : void testIsSSAnnotationPresent(Map<SequenceI, List<AlignmentAnnotation>>,boolean) : void	28432843	2.02	1.01	0.0 0.00%
provideSSAnnotationPresence() : Object[][] provideSSAnnotationPresence() : Object[][]	28532853	17.017	1.01	0.0 0.00%
provideSSSourceFromAnnotationDescription() : Object[][] provideSSSourceFromAnnotationDescription() : Object[][]	28942894	22.022	1.01	0.0 0.00%

Contributing tests

This file is covered by 45 tests. .

Contributing tests

Test contribution	Test	Result
0.09266123	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignmentjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment	1PASS
0.05263158	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_alternativeTranscriptsjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_alternativeTranscripts	1PASS
0.053372867	jalview.analysis.AlignmentUtilsTests.testShowOrHideSequenceAnnotationsjalview.analysis.AlignmentUtilsTests.testShowOrHideSequenceAnnotations	1PASS
0.05263158	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_filterProductsjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_filterProducts	1PASS
0.051149	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_multipleProteinsjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_multipleProteins	1PASS
0.041512232	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withStartAndStopCodonsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withStartAndStopCodons	1PASS
0.03780578	jalview.analysis.AlignmentUtilsTests.testMapCdsToProteinjalview.analysis.AlignmentUtilsTests.testMapCdsToProtein	1PASS
0.032616753	jalview.analysis.AlignmentUtilsTests.testExpandContext_annotationjalview.analysis.AlignmentUtilsTests.testExpandContext_annotation	1PASS
0.032616753	jalview.analysis.AlignmentUtilsTests.testTransferFeaturesjalview.analysis.AlignmentUtilsTests.testTransferFeatures	1PASS
0.031134173	jalview.analysis.AlignmentUtilsTests.testAddReferenceAnnotationsjalview.analysis.AlignmentUtilsTests.testAddReferenceAnnotations	1PASS
0.028169014	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withXrefsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withXrefs	1PASS
0.028169014	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_noXrefsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_noXrefs	1PASS
0.025203854	jalview.analysis.AlignmentUtilsTests.testTransferGeneLocijalview.analysis.AlignmentUtilsTests.testTransferGeneLoci	1PASS
0.021497406	jalview.analysis.AlignmentUtilsTests.testExpandContextjalview.analysis.AlignmentUtilsTests.testExpandContext	1PASS
0.021497406	jalview.analysis.AlignmentUtilsTests.testFindCdsForProtein_noUTRjalview.analysis.AlignmentUtilsTests.testFindCdsForProtein_noUTR	1PASS
0.022238696	jalview.analysis.AlignmentUtilsTests.testAlignAsSameSequencesjalview.analysis.AlignmentUtilsTests.testAlignAsSameSequences	1PASS
0.021497406	jalview.analysis.AlignmentUtilsTests.testAddReferenceContactMapjalview.analysis.AlignmentUtilsTests.testAddReferenceContactMap	1PASS
0.018532246	jalview.analysis.AlignmentUtilsTests.testAlignProteinAsDna_incompleteStartCodonjalview.analysis.AlignmentUtilsTests.testAlignProteinAsDna_incompleteStartCodon	1PASS
0.018532246	jalview.analysis.AlignmentUtilsTests.testAlignProteinAsDnajalview.analysis.AlignmentUtilsTests.testAlignProteinAsDna	1PASS
0.019273536	jalview.analysis.AlignmentUtilsTests.testFindCdsForProteinjalview.analysis.AlignmentUtilsTests.testFindCdsForProtein	1PASS
0.017790956	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_prioritiseXrefsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_prioritiseXrefs	1PASS
0.017049666	jalview.analysis.AlignmentUtilsTests.testAlignAsSameSequencesMultipleSubSeqjalview.analysis.AlignmentUtilsTests.testAlignAsSameSequencesMultipleSubSeq	1PASS
0.015567087	jalview.analysis.AlignmentUtilsTests.testTranslatesAsjalview.analysis.AlignmentUtilsTests.testTranslatesAs	1PASS
0.014084507	jalview.analysis.AlignmentUtilsTests.testFindCdsPositionsjalview.analysis.AlignmentUtilsTests.testFindCdsPositions	1PASS
0.013343217	jalview.analysis.AlignmentUtilsTests.testAlignAs_alternateTranscriptsUngappedjalview.analysis.AlignmentUtilsTests.testAlignAs_alternateTranscriptsUngapped	1PASS
0.011860638	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_noIntronsjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_noIntrons	1PASS
0.012601927	jalview.analysis.AlignmentUtilsTests.testHaveCrossRefjalview.analysis.AlignmentUtilsTests.testHaveCrossRef	1PASS
0.011860638	jalview.analysis.AlignmentUtilsTests.testFindCdsPositions_fivePrimeIncompletejalview.analysis.AlignmentUtilsTests.testFindCdsPositions_fivePrimeIncomplete	1PASS
0.012601927	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withIntronsjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withIntrons	1PASS
0.010378058	jalview.analysis.AlignmentUtilsTests.testTransferFeatures_withSelectjalview.analysis.AlignmentUtilsTests.testTransferFeatures_withSelect	1PASS
0.010378058	jalview.analysis.AlignmentUtilsTests.testTransferFeatures_withOmitjalview.analysis.AlignmentUtilsTests.testTransferFeatures_withOmit	1PASS
0.010378058	jalview.analysis.AlignmentUtilsTests.testGetSequencesByNamejalview.analysis.AlignmentUtilsTests.testGetSequencesByName	1PASS
0.011119348	jalview.analysis.AlignmentUtilsTests.testAddMappedPositionsjalview.analysis.AlignmentUtilsTests.testAddMappedPositions	1PASS
0.011119348	jalview.analysis.AlignmentUtilsTests.testAddMappedPositions_withStopCodonjalview.analysis.AlignmentUtilsTests.testAddMappedPositions_withStopCodon	1PASS
0.010378058	jalview.analysis.AlignmentUtilsTests.testHasCrossRefjalview.analysis.AlignmentUtilsTests.testHasCrossRef	1PASS
0.008895478	jalview.analysis.AlignmentUtilsTests.testIsMappablejalview.analysis.AlignmentUtilsTests.testIsMappable	1PASS
0.008895478	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withTrailingPeptidejalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withTrailingPeptide	1PASS
0.008895478	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_keepIntronGapsOnlyjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_keepIntronGapsOnly	1PASS
0.008895478	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withUnmappedProteinjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withUnmappedProtein	1PASS
0.0074128984	jalview.analysis.AlignmentUtilsTests.testMapCdnaToProtein_forSubsequencejalview.analysis.AlignmentUtilsTests.testMapCdnaToProtein_forSubsequence	1PASS
0.0074128984	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_mappedProteinProteinjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_mappedProteinProtein	1PASS
0.0014825797	jalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSourcesjalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSources	1PASS
0.0014825797	jalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSourcesjalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSources	1PASS
0.0014825797	jalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSourcesjalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSources	1PASS
0.0014825797	jalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSourcesjalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSources	1PASS

Source view

* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)

* Copyright (C) $$Year-Rel$$ The Jalview Authors

* This file is part of Jalview.

* Jalview is free software: you can redistribute it and/or

* modify it under the terms of the GNU General Public License

* as published by the Free Software Foundation, either version 3

* of the License, or (at your option) any later version.

* Jalview is distributed in the hope that it will be useful, but

* WITHOUT ANY WARRANTY; without even the implied warranty

* of MERCHANTABILITY or FITNESS FOR A PARTICULAR

* PURPOSE. See the GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with Jalview. If not, see <http://www.gnu.org/licenses/>.

* The Jalview Authors are detailed in the 'AUTHORS' file.

package jalview.analysis;

import static org.testng.Assert.assertNotEquals;

import static org.testng.AssertJUnit.assertEquals;

import static org.testng.AssertJUnit.assertFalse;

import static org.testng.AssertJUnit.assertNotNull;

import static org.testng.AssertJUnit.assertNull;

import static org.testng.AssertJUnit.assertSame;

import static org.testng.AssertJUnit.assertTrue;

import java.awt.Color;

import java.io.IOException;

import java.util.ArrayList;

import java.util.Arrays;

import java.util.HashMap;

import java.util.LinkedHashMap;

import java.util.List;

import java.util.Map;

import java.util.Set;

import java.util.SortedMap;

import java.util.TreeMap;

import org.testng.Assert;

import org.testng.annotations.BeforeClass;

import org.testng.annotations.DataProvider;

import org.testng.annotations.Test;

import jalview.datamodel.AlignedCodonFrame;

import jalview.datamodel.Alignment;

import jalview.datamodel.AlignmentAnnotation;

import jalview.datamodel.AlignmentI;

import jalview.datamodel.Annotation;

import jalview.datamodel.ContactListI;

import jalview.datamodel.ContactMatrixI;

import jalview.datamodel.DBRefEntry;

import jalview.datamodel.GeneLociI;

import jalview.datamodel.Mapping;

import jalview.datamodel.SearchResultMatchI;

import jalview.datamodel.SearchResultsI;

import jalview.datamodel.SeqDistanceContactMatrix;

import jalview.datamodel.Sequence;

import jalview.datamodel.SequenceFeature;

import jalview.datamodel.SequenceGroup;

import jalview.datamodel.SequenceI;

import jalview.gui.JvOptionPane;

import jalview.io.AppletFormatAdapter;

import jalview.io.DataSourceType;

import jalview.io.FileFormat;

import jalview.io.FileFormatI;

import jalview.io.FormatAdapter;

import jalview.io.gff.SequenceOntologyI;

import jalview.util.Comparison;

import jalview.util.MapList;

import jalview.util.MappingUtils;

public class AlignmentUtilsTests

{

private static Sequence ts = new Sequence("short",

"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm");

@BeforeClass(alwaysRun = true)

public void setUpJvOptionPane()

{

JvOptionPane.setInteractiveMode(false);

JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);

AlignmentAnnotation ann1 = new AlignmentAnnotation(

"Secondary Structure", "Secondary Structure",

new Annotation[] {});

AlignmentAnnotation ann2 = new AlignmentAnnotation("jnetpred",

"jnetpred", new Annotation[] {});

AlignmentAnnotation ann3 = new AlignmentAnnotation("Temp", "Temp",

new Annotation[] {});

AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "Temp",

new Annotation[] {});

AlignmentAnnotation[] anns1 = new AlignmentAnnotation[] { ann1, ann3,

ann4 };

100

AlignmentAnnotation[] anns2 = new AlignmentAnnotation[] { ann2, ann3,

101

ann4 };

102

103

AlignmentAnnotation[] anns3 = new AlignmentAnnotation[] { ann3, ann4 };

104

105

AlignmentAnnotation[] anns4 = new AlignmentAnnotation[0];

106

107

AlignmentAnnotation[] anns5 = new AlignmentAnnotation[] { ann1, ann2,

ann3, ann4 };

}

@Test(groups = { "Functional" })

112

public void testExpandContext()

113

{

114

AlignmentI al = new Alignment(new Sequence[] {});

115

for (int i = 4; i < 14; i += 2)

116

{

117

SequenceI s1 = ts.deriveSequence().getSubSequence(i, i + 7);

118

al.addSequence(s1);

119

}

120

System.out.println(new AppletFormatAdapter()

121

.formatSequences(FileFormat.Clustal, al, true));

122

for (int flnk = -1; flnk < 25; flnk++)

123

{

124

AlignmentI exp = AlignmentUtils.expandContext(al, flnk);

125

System.out.println("\nFlank size: " + flnk);

126

System.out.println(new AppletFormatAdapter()

127

.formatSequences(FileFormat.Clustal, exp, true));

if (flnk == -1)

{

* Full expansion to complete sequences

132

133

for (SequenceI sq : exp.getSequences())

134

{

135

String ung = sq.getSequenceAsString().replaceAll("-+", "");

136

final String errorMsg = "Flanking sequence not the same as original dataset sequence.\n"

137

+ ung + "\n"

138

+ sq.getDatasetSequence().getSequenceAsString();

139

assertTrue(errorMsg, ung.equalsIgnoreCase(

140

sq.getDatasetSequence().getSequenceAsString()));

}

}

else if (flnk == 24)

{

* Last sequence is fully expanded, others have leading gaps to match

147

148

assertTrue(exp.getSequenceAt(4).getSequenceAsString()

149

.startsWith("abc"));

150

assertTrue(exp.getSequenceAt(3).getSequenceAsString()

151

.startsWith("--abc"));

152

assertTrue(exp.getSequenceAt(2).getSequenceAsString()

153

.startsWith("----abc"));

154

assertTrue(exp.getSequenceAt(1).getSequenceAsString()

155

.startsWith("------abc"));

156

assertTrue(exp.getSequenceAt(0).getSequenceAsString()

157

.startsWith("--------abc"));

}

}

}

/**

* Test that annotations are correctly adjusted by expandContext

164

165

@Test(groups = { "Functional" })

166

public void testExpandContext_annotation()

167

{

168

AlignmentI al = new Alignment(new Sequence[] {});

169

SequenceI ds = new Sequence("Seq1", "ABCDEFGHI");

170

// subsequence DEF:

171

SequenceI seq1 = ds.deriveSequence().getSubSequence(3, 6);

172

al.addSequence(seq1);

173

174

175

* Annotate DEF with 4/5/6 respectively

176

177

Annotation[] anns = new Annotation[] { new Annotation(4),

178

new Annotation(5), new Annotation(6) };

179

AlignmentAnnotation ann = new AlignmentAnnotation("SS",

180

"secondary structure", anns);

181

seq1.addAlignmentAnnotation(ann);

182

183

184

* The annotations array should match aligned positions

185

186

assertEquals(3, ann.annotations.length);

187

assertEquals(4, ann.annotations[0].value, 0.001);

188

assertEquals(5, ann.annotations[1].value, 0.001);

189

assertEquals(6, ann.annotations[2].value, 0.001);

190

191

192

* Check annotation to sequence position mappings before expanding the

193

* sequence; these are set up in Sequence.addAlignmentAnnotation ->

194

* Annotation.setSequenceRef -> createSequenceMappings

195

196

assertNull(ann.getAnnotationForPosition(1));

197

assertNull(ann.getAnnotationForPosition(2));

198

assertNull(ann.getAnnotationForPosition(3));

199

assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);

200

assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);

201

assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);

202

assertNull(ann.getAnnotationForPosition(7));

203

assertNull(ann.getAnnotationForPosition(8));

204

assertNull(ann.getAnnotationForPosition(9));

205

206

207

* Expand the subsequence to the full sequence abcDEFghi

208

209

AlignmentI expanded = AlignmentUtils.expandContext(al, -1);

210

assertEquals("abcDEFghi",

211

expanded.getSequenceAt(0).getSequenceAsString());

212

213

214

* Confirm the alignment and sequence have the same SS annotation,

215

* referencing the expanded sequence

216

217

ann = expanded.getSequenceAt(0).getAnnotation()[0];

218

assertSame(ann, expanded.getAlignmentAnnotation()[0]);

219

assertSame(expanded.getSequenceAt(0), ann.sequenceRef);

220

221

222

* The annotations array should have null values except for annotated

223

* positions

224

225

assertNull(ann.annotations[0]);

226

assertNull(ann.annotations[1]);

227

assertNull(ann.annotations[2]);

228

assertEquals(4, ann.annotations[3].value, 0.001);

229

assertEquals(5, ann.annotations[4].value, 0.001);

230

assertEquals(6, ann.annotations[5].value, 0.001);

231

assertNull(ann.annotations[6]);

232

assertNull(ann.annotations[7]);

233

assertNull(ann.annotations[8]);

234

235

236

* sequence position mappings should be unchanged

237

238

assertNull(ann.getAnnotationForPosition(1));

239

assertNull(ann.getAnnotationForPosition(2));

240

assertNull(ann.getAnnotationForPosition(3));

241

assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);

242

assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);

243

assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);

244

assertNull(ann.getAnnotationForPosition(7));

245

assertNull(ann.getAnnotationForPosition(8));

246

assertNull(ann.getAnnotationForPosition(9));

}

/**

* Test method that returns a map of lists of sequences by sequence name.

251

252

* @throws IOException

253

254

@Test(groups = { "Functional" })

255

public void testGetSequencesByName() throws IOException

256

{

257

final String data = ">Seq1Name\nKQYL\n" + ">Seq2Name\nRFPW\n"

258

+ ">Seq1Name\nABCD\n";

259

AlignmentI al = loadAlignment(data, FileFormat.Fasta);

260

Map<String, List<SequenceI>> map = AlignmentUtils

261

.getSequencesByName(al);

262

assertEquals(2, map.keySet().size());

263

assertEquals(2, map.get("Seq1Name").size());

264

assertEquals("KQYL", map.get("Seq1Name").get(0).getSequenceAsString());

265

assertEquals("ABCD", map.get("Seq1Name").get(1).getSequenceAsString());

266

assertEquals(1, map.get("Seq2Name").size());

267

assertEquals("RFPW", map.get("Seq2Name").get(0).getSequenceAsString());

}

/**

* Helper method to load an alignment and ensure dataset sequences are set up.

* @param data

* @param format

* TODO

* @return

* @throws IOException

278

279

protected AlignmentI loadAlignment(final String data, FileFormatI format)

280

throws IOException

281

{

282

AlignmentI a = new FormatAdapter().readFile(data, DataSourceType.PASTE,

format);

a.setDataset(null);

return a;

}

/**

* Test mapping of protein to cDNA, for the case where we have no sequence

290

* cross-references, so mappings are made first-served 1-1 where sequences

291

* translate.

292

293

* @throws IOException

294

295

@Test(groups = { "Functional" })

296

public void testMapProteinAlignmentToCdna_noXrefs() throws IOException

297

{

298

List<SequenceI> protseqs = new ArrayList<>();

299

protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));

300

protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));

301

protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));

302

AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));

303

protein.setDataset(null);

304

305

List<SequenceI> dnaseqs = new ArrayList<>();

306

dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR

307

dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAA")); // = EIQ

308

dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ

309

dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ

310

AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));

311

cdna.setDataset(null);

312

313

assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));

314

315

// 3 mappings made, each from 1 to 1 sequence

316

assertEquals(3, protein.getCodonFrames().size());

317

assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());

318

assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());

319

assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());

320

321

// V12345 mapped to A22222

322

AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))

323

.get(0);

324

assertEquals(1, acf.getdnaSeqs().length);

325

assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),

326

acf.getdnaSeqs()[0]);

327

Mapping[] protMappings = acf.getProtMappings();

328

assertEquals(1, protMappings.length);

329

MapList mapList = protMappings[0].getMap();

330

assertEquals(3, mapList.getFromRatio());

331

assertEquals(1, mapList.getToRatio());

332

assertTrue(

333

Arrays.equals(new int[]

334

{ 1, 9 }, mapList.getFromRanges().get(0)));

335

assertEquals(1, mapList.getFromRanges().size());

336

assertTrue(

337

Arrays.equals(new int[]

338

{ 1, 3 }, mapList.getToRanges().get(0)));

339

assertEquals(1, mapList.getToRanges().size());

340

341

// V12346 mapped to A33333

342

acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);

343

assertEquals(1, acf.getdnaSeqs().length);

344

assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),

345

acf.getdnaSeqs()[0]);

346

347

// V12347 mapped to A11111

348

acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);

349

assertEquals(1, acf.getdnaSeqs().length);

350

assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),

351

acf.getdnaSeqs()[0]);

352

353

// no mapping involving the 'extra' A44444

354

assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty());

}

/**

* Test for the alignSequenceAs method that takes two sequences and a mapping.

359

360

@Test(groups = { "Functional" })

361

public void testAlignSequenceAs_withMapping_noIntrons()

362

{

363

MapList map = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 }, 3, 1);

364

365

366

* No existing gaps in dna:

367

368

checkAlignSequenceAs("GGGAAA", "-A-L-", false, false, map,

"---GGG---AAA");

* Now introduce gaps in dna but ignore them when realigning.

373

374

checkAlignSequenceAs("-G-G-G-A-A-A-", "-A-L-", false, false, map,

"---GGG---AAA");

* Now include gaps in dna when realigning. First retaining 'mapped' gaps

379

* only, i.e. those within the exon region.

380

381

checkAlignSequenceAs("-G-G--G-A--A-A-", "-A-L-", true, false, map,

382

"---G-G--G---A--A-A");

383

384

385

* Include all gaps in dna when realigning (within and without the exon

386

* region). The leading gap, and the gaps between codons, are subsumed by

387

* the protein alignment gap.

388

389

checkAlignSequenceAs("-G-GG--AA-A---", "-A-L-", true, true, map,

390

"---G-GG---AA-A---");

391

392

393

* Include only unmapped gaps in dna when realigning (outside the exon

394

* region). The leading gap, and the gaps between codons, are subsumed by

395

* the protein alignment gap.

396

397

checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", false, true, map,

"---GGG---AAA---");

}

/**

* Test for the alignSequenceAs method that takes two sequences and a mapping.

403

404

@Test(groups = { "Functional" })

405

public void testAlignSequenceAs_withMapping_withIntrons()

406

{

407

408

* Exons at codon 2 (AAA) and 4 (TTT)

409

410

MapList map = new MapList(new int[] { 4, 6, 10, 12 },

new int[]

{ 1, 2 }, 3, 1);

* Simple case: no gaps in dna

416

417

checkAlignSequenceAs("GGGAAACCCTTTGGG", "--A-L-", false, false, map,

418

"GGG---AAACCCTTTGGG");

419

420

421

* Add gaps to dna - but ignore when realigning.

422

423

checkAlignSequenceAs("-G-G-G--A--A---AC-CC-T-TT-GG-G-", "--A-L-", false,

424

false, map, "GGG---AAACCCTTTGGG");

425

426

427

* Add gaps to dna - include within exons only when realigning.

428

429

checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true,

430

false, map, "GGG---A--A---ACCCT-TTGGG");

431

432

433

* Include gaps outside exons only when realigning.

434

435

checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",

436

false, true, map, "-G-G-GAAAC-CCTTT-GG-G-");

437

438

439

* Include gaps following first intron if we are 'preserving mapped gaps'

440

441

checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true,

442

true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-");

443

444

445

* Include all gaps in dna when realigning.

446

447

checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true,

448

true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-");

}

/**

* Test for the case where not all of the protein sequence is mapped to cDNA.

453

454

@Test(groups = { "Functional" })

455

public void testAlignSequenceAs_withMapping_withUnmappedProtein()

456

{

457

458

* Exons at codon 2 (AAA) and 4 (TTT) mapped to A and P

459

460

final MapList map = new MapList(new int[] { 4, 6, 10, 12 },

461

new int[]

462

{ 1, 1, 3, 3 }, 3, 1);

463

464

465

* -L- 'aligns' ccc------

466

467

checkAlignSequenceAs("gggAAAcccTTTggg", "-A-L-P-", false, false, map,

468

"gggAAAccc------TTTggg");

}

/**

* Helper method that performs and verifies the method under test.

473

474

* @param alignee

475

* the sequence to be realigned

476

* @param alignModel

477

* the sequence whose alignment is to be copied

478

* @param preserveMappedGaps

479

* @param preserveUnmappedGaps

* @param map

* @param expected

protected void checkAlignSequenceAs(final String alignee,

484

final String alignModel, final boolean preserveMappedGaps,

485

final boolean preserveUnmappedGaps, MapList map,

486

final String expected)

487

{

488

SequenceI alignMe = new Sequence("Seq1", alignee);

489

alignMe.createDatasetSequence();

490

SequenceI alignFrom = new Sequence("Seq2", alignModel);

491

alignFrom.createDatasetSequence();

492

AlignedCodonFrame acf = new AlignedCodonFrame();

493

acf.addMap(alignMe.getDatasetSequence(), alignFrom.getDatasetSequence(),

494

map);

495

496

AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "---", '-',

497

preserveMappedGaps, preserveUnmappedGaps);

498

assertEquals(expected, alignMe.getSequenceAsString());

}

/**

* Test for the alignSequenceAs method where we preserve gaps in introns only.

503

504

@Test(groups = { "Functional" })

505

public void testAlignSequenceAs_keepIntronGapsOnly()

{

* Intron GGGAAA followed by exon CCCTTT

510

511

MapList map = new MapList(new int[] { 7, 12 }, new int[] { 1, 2 }, 3,

512

1);

513

514

checkAlignSequenceAs("GG-G-AA-A-C-CC-T-TT", "AL", false, true, map,

"GG-G-AA-ACCCTTT");

}

/**

* Test the method that realigns protein to match mapped codon alignment.

520

521

@Test(groups = { "Functional" })

522

public void testAlignProteinAsDna()

523

{

524

// seq1 codons are [1,2,3] [4,5,6] [7,8,9] [10,11,12]

525

SequenceI dna1 = new Sequence("Seq1", "TGCCATTACCAG-");

526

// seq2 codons are [1,3,4] [5,6,7] [8,9,10] [11,12,13]

527

SequenceI dna2 = new Sequence("Seq2", "T-GCCATTACCAG");

528

// seq3 codons are [1,2,3] [4,5,7] [8,9,10] [11,12,13]

529

SequenceI dna3 = new Sequence("Seq3", "TGCCA-TTACCAG");

530

AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });

531

dna.setDataset(null);

532

533

// protein alignment will be realigned like dna

534

SequenceI prot1 = new Sequence("Seq1", "CHYQ");

535

SequenceI prot2 = new Sequence("Seq2", "CHYQ");

536

SequenceI prot3 = new Sequence("Seq3", "CHYQ");

537

SequenceI prot4 = new Sequence("Seq4", "R-QSV"); // unmapped, unchanged

538

AlignmentI protein = new Alignment(

539

new SequenceI[]

540

{ prot1, prot2, prot3, prot4 });

541

protein.setDataset(null);

542

543

MapList map = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3,

544

1);

545

AlignedCodonFrame acf = new AlignedCodonFrame();

546

acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map);

547

acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map);

548

acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map);

549

ArrayList<AlignedCodonFrame> acfs = new ArrayList<>();

550

acfs.add(acf);

551

protein.setCodonFrames(acfs);

552

553

554

* Translated codon order is [1,2,3] [1,3,4] [4,5,6] [4,5,7] [5,6,7] [7,8,9]

555

* [8,9,10] [10,11,12] [11,12,13]

556

557

AlignmentUtils.alignProteinAsDna(protein, dna);

558

assertEquals("C-H--Y-Q-", prot1.getSequenceAsString());

559

assertEquals("-C--H-Y-Q", prot2.getSequenceAsString());

560

assertEquals("C--H--Y-Q", prot3.getSequenceAsString());

561

assertEquals("R-QSV", prot4.getSequenceAsString());

}

/**

* Test the method that tests whether a CDNA sequence translates to a protein

566

* sequence

567

568

@Test(groups = { "Functional" })

569

public void testTranslatesAs()

570

{

571

// null arguments check

572

assertFalse(AlignmentUtils.translatesAs(null, 0, null));

573

assertFalse(AlignmentUtils.translatesAs(new char[] { 't' }, 0, null));

574

assertFalse(AlignmentUtils.translatesAs(null, 0, new char[] { 'a' }));

575

576

// straight translation

577

assertTrue(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,

578

"FPKG".toCharArray()));

579

// with extra start codon (not in protein)

580

assertTrue(AlignmentUtils.translatesAs("atgtttcccaaaggg".toCharArray(),

581

3, "FPKG".toCharArray()));

582

// with stop codon1 (not in protein)

583

assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(),

584

0, "FPKG".toCharArray()));

585

// with stop codon1 (in protein as *)

586

assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(),

587

0, "FPKG*".toCharArray()));

588

// with stop codon2 (not in protein)

589

assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtag".toCharArray(),

590

0, "FPKG".toCharArray()));

591

// with stop codon3 (not in protein)

592

assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtga".toCharArray(),

593

0, "FPKG".toCharArray()));

594

// with start and stop codon1

595

assertTrue(AlignmentUtils.translatesAs(

596

"atgtttcccaaagggtaa".toCharArray(), 3, "FPKG".toCharArray()));

597

// with start and stop codon1 (in protein as *)

598

assertTrue(AlignmentUtils.translatesAs(

599

"atgtttcccaaagggtaa".toCharArray(), 3, "FPKG*".toCharArray()));

600

// with start and stop codon2

601

assertTrue(AlignmentUtils.translatesAs(

602

"atgtttcccaaagggtag".toCharArray(), 3, "FPKG".toCharArray()));

603

// with start and stop codon3

604

assertTrue(AlignmentUtils.translatesAs(

605

"atgtttcccaaagggtga".toCharArray(), 3, "FPKG".toCharArray()));

606

607

// with embedded stop codons

608

assertTrue(AlignmentUtils.translatesAs(

609

"atgtttTAGcccaaaTAAgggtga".toCharArray(), 3,

610

"F*PK*G".toCharArray()));

611

612

// wrong protein

613

assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,

614

"FPMG".toCharArray()));

615

616

// truncated dna

617

assertFalse(AlignmentUtils.translatesAs("tttcccaaagg".toCharArray(), 0,

618

"FPKG".toCharArray()));

619

620

// truncated protein

621

assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,

622

"FPK".toCharArray()));

623

624

// overlong dna (doesn't end in stop codon)

625

assertFalse(AlignmentUtils.translatesAs("tttcccaaagggttt".toCharArray(),

626

0, "FPKG".toCharArray()));

627

628

// dna + stop codon + more

629

assertFalse(AlignmentUtils.translatesAs(

630

"tttcccaaagggttaga".toCharArray(), 0, "FPKG".toCharArray()));

631

632

// overlong protein

633

assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,

634

"FPKGQ".toCharArray()));

}

/**

* Test mapping of protein to cDNA, for cases where the cDNA has start and/or

639

* stop codons in addition to the protein coding sequence.

640

641

* @throws IOException

642

643

@Test(groups = { "Functional" })

644

public void testMapProteinAlignmentToCdna_withStartAndStopCodons()

645

throws IOException

646

{

647

List<SequenceI> protseqs = new ArrayList<>();

648

protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));

649

protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));

650

protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));

651

AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));

652

protein.setDataset(null);

653

654

List<SequenceI> dnaseqs = new ArrayList<>();

655

// start + SAR:

656

dnaseqs.add(new Sequence("EMBL|A11111", "ATGTCAGCACGC"));

657

// = EIQ + stop

658

dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAATAA"));

659

// = start +EIQ + stop

660

dnaseqs.add(new Sequence("EMBL|A33333", "ATGGAAATCCAGTAG"));

661

dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG"));

662

AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));

663

cdna.setDataset(null);

664

665

assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));

666

667

// 3 mappings made, each from 1 to 1 sequence

668

assertEquals(3, protein.getCodonFrames().size());

669

assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());

670

assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());

671

assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());

672

673

// V12345 mapped from A22222

674

AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))

675

.get(0);

676

assertEquals(1, acf.getdnaSeqs().length);

677

assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),

678

acf.getdnaSeqs()[0]);

679

Mapping[] protMappings = acf.getProtMappings();

680

assertEquals(1, protMappings.length);

681

MapList mapList = protMappings[0].getMap();

682

assertEquals(3, mapList.getFromRatio());

683

assertEquals(1, mapList.getToRatio());

684

assertTrue(

685

Arrays.equals(new int[]

686

{ 1, 9 }, mapList.getFromRanges().get(0)));

687

assertEquals(1, mapList.getFromRanges().size());

688

assertTrue(

689

Arrays.equals(new int[]

690

{ 1, 3 }, mapList.getToRanges().get(0)));

691

assertEquals(1, mapList.getToRanges().size());

692

693

// V12346 mapped from A33333 starting position 4

694

acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);

695

assertEquals(1, acf.getdnaSeqs().length);

696

assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),

697

acf.getdnaSeqs()[0]);

698

protMappings = acf.getProtMappings();

699

assertEquals(1, protMappings.length);

700

mapList = protMappings[0].getMap();

701

assertEquals(3, mapList.getFromRatio());

702

assertEquals(1, mapList.getToRatio());

703

assertTrue(

704

Arrays.equals(new int[]

705

{ 4, 12 }, mapList.getFromRanges().get(0)));

706

assertEquals(1, mapList.getFromRanges().size());

707

assertTrue(

708

Arrays.equals(new int[]

709

{ 1, 3 }, mapList.getToRanges().get(0)));

710

assertEquals(1, mapList.getToRanges().size());

711

712

// V12347 mapped to A11111 starting position 4

713

acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);

714

assertEquals(1, acf.getdnaSeqs().length);

715

assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),

716

acf.getdnaSeqs()[0]);

717

protMappings = acf.getProtMappings();

718

assertEquals(1, protMappings.length);

719

mapList = protMappings[0].getMap();

720

assertEquals(3, mapList.getFromRatio());

721

assertEquals(1, mapList.getToRatio());

722

assertTrue(

723

Arrays.equals(new int[]

724

{ 4, 12 }, mapList.getFromRanges().get(0)));

725

assertEquals(1, mapList.getFromRanges().size());

726

assertTrue(

727

Arrays.equals(new int[]

728

{ 1, 3 }, mapList.getToRanges().get(0)));

729

assertEquals(1, mapList.getToRanges().size());

730

731

// no mapping involving the 'extra' A44444

732

assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty());

}

/**

* Test mapping of protein to cDNA, for the case where we have some sequence

737

* cross-references. Verify that 1-to-many mappings are made where

738

* cross-references exist and sequences are mappable.

739

740

* @throws IOException

741

742

@Test(groups = { "Functional" })

743

public void testMapProteinAlignmentToCdna_withXrefs() throws IOException

744

{

745

List<SequenceI> protseqs = new ArrayList<>();

746

protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));

747

protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));

748

protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));

749

AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));

750

protein.setDataset(null);

751

752

List<SequenceI> dnaseqs = new ArrayList<>();

753

dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR

754

dnaseqs.add(new Sequence("EMBL|A22222", "ATGGAGATACAA")); // = start + EIQ

755

dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ

756

dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ

757

dnaseqs.add(new Sequence("EMBL|A55555", "GAGATTCAG")); // = EIQ

758

AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[5]));

759

cdna.setDataset(null);

760

761

// Xref A22222 to V12345 (should get mapped)

762

dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));

763

// Xref V12345 to A44444 (should get mapped)

764

protseqs.get(0).addDBRef(new DBRefEntry("EMBL", "1", "A44444"));

765

// Xref A33333 to V12347 (sequence mismatch - should not get mapped)

766

dnaseqs.get(2).addDBRef(new DBRefEntry("UNIPROT", "1", "V12347"));

767

// as V12345 is mapped to A22222 and A44444, this leaves V12346 unmapped.

768

// it should get paired up with the unmapped A33333

769

// A11111 should be mapped to V12347

770

// A55555 is spare and has no xref so is not mapped

771

772

assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));

773

774

// 4 protein mappings made for 3 proteins, 2 to V12345, 1 each to V12346/7

775

assertEquals(3, protein.getCodonFrames().size());

776

assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());

777

assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());

778

assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());

779

780

// one mapping for each of the first 4 cDNA sequences

781

assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size());

782

assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size());

783

assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(2)).size());

784

assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(3)).size());

785

786

// V12345 mapped to A22222 and A44444

787

AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))

788

.get(0);

789

assertEquals(2, acf.getdnaSeqs().length);

790

assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),

791

acf.getdnaSeqs()[0]);

792

assertEquals(cdna.getSequenceAt(3).getDatasetSequence(),

793

acf.getdnaSeqs()[1]);

794

795

// V12346 mapped to A33333

796

acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);

797

assertEquals(1, acf.getdnaSeqs().length);

798

assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),

799

acf.getdnaSeqs()[0]);

800

801

// V12347 mapped to A11111

802

acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);

803

assertEquals(1, acf.getdnaSeqs().length);

804

assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),

805

acf.getdnaSeqs()[0]);

806

807

// no mapping involving the 'extra' A55555

808

assertTrue(protein.getCodonFrame(cdna.getSequenceAt(4)).isEmpty());

}

/**

* Test mapping of protein to cDNA, for the case where we have some sequence

813

* cross-references. Verify that once we have made an xref mapping we don't

814

* also map un-xrefd sequeces.

815

816

* @throws IOException

817

818

@Test(groups = { "Functional" })

819

public void testMapProteinAlignmentToCdna_prioritiseXrefs()

820

throws IOException

821

{

822

List<SequenceI> protseqs = new ArrayList<>();

823

protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));

824

protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));

825

AlignmentI protein = new Alignment(

826

protseqs.toArray(new SequenceI[protseqs.size()]));

827

protein.setDataset(null);

828

829

List<SequenceI> dnaseqs = new ArrayList<>();

830

dnaseqs.add(new Sequence("EMBL|A11111", "GAAATCCAG")); // = EIQ

831

dnaseqs.add(new Sequence("EMBL|A22222", "GAAATTCAG")); // = EIQ

832

AlignmentI cdna = new Alignment(

833

dnaseqs.toArray(new SequenceI[dnaseqs.size()]));

834

cdna.setDataset(null);

835

836

// Xref A22222 to V12345 (should get mapped)

837

// A11111 should then be mapped to the unmapped V12346

838

dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));

839

840

assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));

841

842

// 2 protein mappings made

843

assertEquals(2, protein.getCodonFrames().size());

844

assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());

845

assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());

846

847

// one mapping for each of the cDNA sequences

848

assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size());

849

assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size());

850

851

// V12345 mapped to A22222

852

AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))

853

.get(0);

854

assertEquals(1, acf.getdnaSeqs().length);

855

assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),

856

acf.getdnaSeqs()[0]);

857

858

// V12346 mapped to A11111

859

acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);

860

assertEquals(1, acf.getdnaSeqs().length);

861

assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),

862

acf.getdnaSeqs()[0]);

}

/**

* Test the method that shows or hides sequence annotations by type(s) and

867

* selection group.

868

869

@Test(groups = { "Functional" })

870

public void testShowOrHideSequenceAnnotations()

871

{

872

SequenceI seq1 = new Sequence("Seq1", "AAA");

873

SequenceI seq2 = new Sequence("Seq2", "BBB");

874

SequenceI seq3 = new Sequence("Seq3", "CCC");

875

Annotation[] anns = new Annotation[] { new Annotation(2f) };

876

AlignmentAnnotation ann1 = new AlignmentAnnotation("Structure", "ann1",

877

anns);

878

ann1.setSequenceRef(seq1);

879

AlignmentAnnotation ann2 = new AlignmentAnnotation("Structure", "ann2",

880

anns);

881

ann2.setSequenceRef(seq2);

882

AlignmentAnnotation ann3 = new AlignmentAnnotation("Structure", "ann3",

883

anns);

884

AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "ann4",

885

anns);

886

ann4.setSequenceRef(seq1);

887

AlignmentAnnotation ann5 = new AlignmentAnnotation("Temp", "ann5",

888

anns);

889

ann5.setSequenceRef(seq2);

890

AlignmentAnnotation ann6 = new AlignmentAnnotation("Temp", "ann6",

891

anns);

892

AlignmentI al = new Alignment(new SequenceI[] { seq1, seq2, seq3 });

893

al.addAnnotation(ann1); // Structure for Seq1

894

al.addAnnotation(ann2); // Structure for Seq2

895

al.addAnnotation(ann3); // Structure for no sequence

896

al.addAnnotation(ann4); // Temp for seq1

897

al.addAnnotation(ann5); // Temp for seq2

898

al.addAnnotation(ann6); // Temp for no sequence

899

List<String> types = new ArrayList<>();

900

List<SequenceI> scope = new ArrayList<>();

901

902

903

* Set all sequence related Structure to hidden (ann1, ann2)

904

905

types.add("Structure");

906

AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false,

907

false);

908

assertFalse(ann1.visible);

909

assertFalse(ann2.visible);

910

assertTrue(ann3.visible); // not sequence-related, not affected

911

assertTrue(ann4.visible); // not Structure, not affected

912

assertTrue(ann5.visible); // "

913

assertTrue(ann6.visible); // not sequence-related, not affected

914

915

916

* Set Temp in {seq1, seq3} to hidden

types.clear();

types.add("Temp");

scope.add(seq1);

scope.add(seq3);

AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, false,

923

false);

924

assertFalse(ann1.visible); // unchanged

925

assertFalse(ann2.visible); // unchanged

926

assertTrue(ann3.visible); // not sequence-related, not affected

927

assertFalse(ann4.visible); // Temp for seq1 hidden

928

assertTrue(ann5.visible); // not in scope, not affected

929

assertTrue(ann6.visible); // not sequence-related, not affected

930

931

932

* Set Temp in all sequences to hidden

types.clear();

types.add("Temp");

scope.add(seq1);

scope.add(seq3);

AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false,

939

false);

940

assertFalse(ann1.visible); // unchanged

941

assertFalse(ann2.visible); // unchanged

942

assertTrue(ann3.visible); // not sequence-related, not affected

943

assertFalse(ann4.visible); // Temp for seq1 hidden

944

assertFalse(ann5.visible); // Temp for seq2 hidden

945

assertTrue(ann6.visible); // not sequence-related, not affected

946

947

948

* Set all types in {seq1, seq3} to visible

types.clear();

scope.clear();

scope.add(seq1);

scope.add(seq3);

AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, true,

955

true);

956

assertTrue(ann1.visible); // Structure for seq1 set visible

957

assertFalse(ann2.visible); // not in scope, unchanged

958

assertTrue(ann3.visible); // not sequence-related, not affected

959

assertTrue(ann4.visible); // Temp for seq1 set visible

960

assertFalse(ann5.visible); // not in scope, unchanged

961

assertTrue(ann6.visible); // not sequence-related, not affected

962

963

964

* Set all types in all scope to hidden

965

966

AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, true,

967

false);

968

assertFalse(ann1.visible);

969

assertFalse(ann2.visible);

970

assertTrue(ann3.visible); // not sequence-related, not affected

971

assertFalse(ann4.visible);

972

assertFalse(ann5.visible);

973

assertTrue(ann6.visible); // not sequence-related, not affected

}

/**

* Tests for the method that checks if one sequence cross-references another

978

979

@Test(groups = { "Functional" })

980

public void testHasCrossRef()

981

{

982

assertFalse(AlignmentUtils.hasCrossRef(null, null));

983

SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF");

984

assertFalse(AlignmentUtils.hasCrossRef(seq1, null));

985

assertFalse(AlignmentUtils.hasCrossRef(null, seq1));

986

SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF");

987

assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2));

988

989

// different ref

990

seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20193"));

991

assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2));

992

993

// case-insensitive; version number is ignored

994

seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20192"));

995

assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2));

996

997

// right case!

998

seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));

999

assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2));

1000

// test is one-way only

1001

assertFalse(AlignmentUtils.hasCrossRef(seq2, seq1));

}

/**

* Tests for the method that checks if either sequence cross-references the

1006

* other

1007

1008

@Test(groups = { "Functional" })

1009

public void testHaveCrossRef()

1010

{

1011

assertFalse(AlignmentUtils.hasCrossRef(null, null));

1012

SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF");

1013

assertFalse(AlignmentUtils.haveCrossRef(seq1, null));

1014

assertFalse(AlignmentUtils.haveCrossRef(null, seq1));

1015

SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF");

1016

assertFalse(AlignmentUtils.haveCrossRef(seq1, seq2));

1017

1018

seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));

1019

assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));

1020

// next is true for haveCrossRef, false for hasCrossRef

1021

assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));

1022

1023

// now the other way round

1024

seq1.setDBRefs(null);

1025

seq2.addDBRef(new DBRefEntry("EMBL", "1", "A12345"));

1026

assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));

1027

assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));

1028

1029

// now both ways

1030

seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));

1031

assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));

1032

assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));

}

/**

* Test the method that extracts the cds-only part of a dna alignment.

1037

1038

@Test(groups = { "Functional" })

1039

public void testMakeCdsAlignment()

{

* scenario:

* dna1 --> [4, 6] [10,12] --> pep1

1044

* dna2 --> [1, 3] [7, 9] [13,15] --> pep2

1045

1046

SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");

1047

SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");

1048

SequenceI pep1 = new Sequence("pep1", "GF");

1049

SequenceI pep2 = new Sequence("pep2", "GFP");

1050

pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "pep1"));

1051

pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "pep2"));

1052

dna1.createDatasetSequence();

1053

dna2.createDatasetSequence();

1054

pep1.createDatasetSequence();

1055

pep2.createDatasetSequence();

1056

AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });

1057

dna.setDataset(null);

1058

1059

1060

* put a variant feature on dna2 base 8

1061

* - should transfer to cds2 base 5

1062

1063

dna2.addSequenceFeature(

1064

new SequenceFeature("variant", "hgmd", 8, 8, 0f, null));

1065

1066

1067

* need a sourceDbRef if we are to construct dbrefs to the CDS

1068

* sequence from the dna contig sequences

1069

1070

DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1");

1071

dna1.getDatasetSequence().addDBRef(dbref);

1072

org.testng.Assert.assertEquals(dbref, dna1.getPrimaryDBRefs().get(0));

1073

dbref = new DBRefEntry("ENSEMBL", "0", "dna2");

1074

dna2.getDatasetSequence().addDBRef(dbref);

1075

org.testng.Assert.assertEquals(dbref, dna2.getPrimaryDBRefs().get(0));

1076

1077

1078

* CDS sequences are 'discovered' from dna-to-protein mappings on the alignment

1079

* dataset (e.g. added from dbrefs by CrossRef.findXrefSequences)

1080

1081

MapList mapfordna1 = new MapList(new int[] { 4, 6, 10, 12 },

1082

new int[]

1083

{ 1, 2 }, 3, 1);

1084

AlignedCodonFrame acf = new AlignedCodonFrame();

1085

acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),

1086

mapfordna1);

1087

dna.addCodonFrame(acf);

1088

MapList mapfordna2 = new MapList(new int[] { 1, 3, 7, 9, 13, 15 },

1089

new int[]

1090

{ 1, 3 }, 3, 1);

1091

acf = new AlignedCodonFrame();

1092

acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(),

1093

mapfordna2);

1094

dna.addCodonFrame(acf);

1095

1096

1097

* In this case, mappings originally came from matching Uniprot accessions

1098

* - so need an xref on dna involving those regions.

1099

* These are normally constructed from CDS annotation

1100

1101

DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1",

1102

new Mapping(mapfordna1));

1103

dna1.addDBRef(dna1xref);

1104

assertEquals(2, dna1.getDBRefs().size()); // to self and to pep1

1105

DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2",

1106

new Mapping(mapfordna2));

1107

dna2.addDBRef(dna2xref);

1108

assertEquals(2, dna2.getDBRefs().size()); // to self and to pep2

1109

1110

1111

* execute method under test:

1112

1113

AlignmentI cds = AlignmentUtils

1114

.makeCdsAlignment(new SequenceI[]

1115

{ dna1, dna2 }, dna.getDataset(), null);

1116

1117

1118

* verify cds sequences

1119

1120

assertEquals(2, cds.getSequences().size());

1121

assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());

1122

assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());

1123

1124

1125

* verify shared, extended alignment dataset

1126

1127

assertSame(dna.getDataset(), cds.getDataset());

1128

SequenceI cds1Dss = cds.getSequenceAt(0).getDatasetSequence();

1129

SequenceI cds2Dss = cds.getSequenceAt(1).getDatasetSequence();

1130

assertTrue(dna.getDataset().getSequences().contains(cds1Dss));

1131

assertTrue(dna.getDataset().getSequences().contains(cds2Dss));

1132

1133

1134

* verify CDS has a dbref with mapping to peptide

1135

1136

assertNotNull(cds1Dss.getDBRefs());

1137

assertEquals(2, cds1Dss.getDBRefs().size());

1138

dbref = cds1Dss.getDBRefs().get(0);

1139

assertEquals(dna1xref.getSource(), dbref.getSource());

1140

// version is via ensembl's primary ref

1141

assertEquals(dna1xref.getVersion(), dbref.getVersion());

1142

assertEquals(dna1xref.getAccessionId(), dbref.getAccessionId());

1143

assertNotNull(dbref.getMap());

1144

assertSame(pep1.getDatasetSequence(), dbref.getMap().getTo());

1145

MapList cdsMapping = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 },

1146

3, 1);

1147

assertEquals(cdsMapping, dbref.getMap().getMap());

1148

1149

1150

* verify peptide has added a dbref with reverse mapping to CDS

1151

1152

assertNotNull(pep1.getDBRefs());

1153

// FIXME pep1.getDBRefs() is 1 - is that the correct behaviour ?

1154

assertEquals(2, pep1.getDBRefs().size());

1155

dbref = pep1.getDBRefs().get(1);

1156

assertEquals("ENSEMBL", dbref.getSource());

1157

assertEquals("0", dbref.getVersion());

1158

assertEquals("CDS|dna1", dbref.getAccessionId());

1159

assertNotNull(dbref.getMap());

1160

assertSame(cds1Dss, dbref.getMap().getTo());

1161

assertEquals(cdsMapping.getInverse(), dbref.getMap().getMap());

1162

1163

1164

* verify cDNA has added a dbref with mapping to CDS

1165

1166

assertEquals(3, dna1.getDBRefs().size());

1167

DBRefEntry dbRefEntry = dna1.getDBRefs().get(2);

1168

assertSame(cds1Dss, dbRefEntry.getMap().getTo());

1169

MapList dnaToCdsMapping = new MapList(new int[] { 4, 6, 10, 12 },

1170

new int[]

1171

{ 1, 6 }, 1, 1);

1172

assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap());

1173

assertEquals(3, dna2.getDBRefs().size());

1174

dbRefEntry = dna2.getDBRefs().get(2);

1175

assertSame(cds2Dss, dbRefEntry.getMap().getTo());

1176

dnaToCdsMapping = new MapList(new int[] { 1, 3, 7, 9, 13, 15 },

1177

new int[]

1178

{ 1, 9 }, 1, 1);

1179

assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap());

1180

1181

1182

* verify CDS has added a dbref with mapping to cDNA

1183

1184

assertEquals(2, cds1Dss.getDBRefs().size());

1185

dbRefEntry = cds1Dss.getDBRefs().get(1);

1186

assertSame(dna1.getDatasetSequence(), dbRefEntry.getMap().getTo());

1187

MapList cdsToDnaMapping = new MapList(new int[] { 1, 6 },

1188

new int[]

1189

{ 4, 6, 10, 12 }, 1, 1);

1190

assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap());

1191

assertEquals(2, cds2Dss.getDBRefs().size());

1192

dbRefEntry = cds2Dss.getDBRefs().get(1);

1193

assertSame(dna2.getDatasetSequence(), dbRefEntry.getMap().getTo());

1194

cdsToDnaMapping = new MapList(new int[] { 1, 9 },

1195

new int[]

1196

{ 1, 3, 7, 9, 13, 15 }, 1, 1);

1197

assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap());

1198

1199

1200

* Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide

1201

* the mappings are on the shared alignment dataset

1202

* 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)

1203

1204

List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();

1205

assertEquals(6, cdsMappings.size());

1206

1207

1208

* verify that mapping sets for dna and cds alignments are different

1209

* [not current behaviour - all mappings are on the alignment dataset]

1210

1211

// select -> subselect type to test.

1212

// Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());

1213

// assertEquals(4, dna.getCodonFrames().size());

1214

// assertEquals(4, cds.getCodonFrames().size());

1215

1216

1217

* Two mappings involve pep1 (dna to pep1, cds to pep1)

1218

* Mapping from pep1 to GGGTTT in first new exon sequence

1219

1220

List<AlignedCodonFrame> pep1Mappings = MappingUtils

1221

.findMappingsForSequence(pep1, cdsMappings);

1222

assertEquals(2, pep1Mappings.size());

1223

List<AlignedCodonFrame> mappings = MappingUtils

1224

.findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings);

1225

assertEquals(1, mappings.size());

1226

1227

// map G to GGG

1228

SearchResultsI sr = MappingUtils.buildSearchResults(pep1, 1, mappings);

1229

assertEquals(1, sr.getResults().size());

1230

SearchResultMatchI m = sr.getResults().get(0);

1231

assertSame(cds1Dss, m.getSequence());

1232

assertEquals(1, m.getStart());

1233

assertEquals(3, m.getEnd());

1234

// map F to TTT

1235

sr = MappingUtils.buildSearchResults(pep1, 2, mappings);

1236

m = sr.getResults().get(0);

1237

assertSame(cds1Dss, m.getSequence());

1238

assertEquals(4, m.getStart());

1239

assertEquals(6, m.getEnd());

1240

1241

1242

* Two mappings involve pep2 (dna to pep2, cds to pep2)

1243

* Verify mapping from pep2 to GGGTTTCCC in second new exon sequence

1244

1245

List<AlignedCodonFrame> pep2Mappings = MappingUtils

1246

.findMappingsForSequence(pep2, cdsMappings);

1247

assertEquals(2, pep2Mappings.size());

1248

mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1),

1249

pep2Mappings);

1250

assertEquals(1, mappings.size());

1251

// map G to GGG

1252

sr = MappingUtils.buildSearchResults(pep2, 1, mappings);

1253

assertEquals(1, sr.getResults().size());

1254

m = sr.getResults().get(0);

1255

assertSame(cds2Dss, m.getSequence());

1256

assertEquals(1, m.getStart());

1257

assertEquals(3, m.getEnd());

1258

// map F to TTT

1259

sr = MappingUtils.buildSearchResults(pep2, 2, mappings);

1260

m = sr.getResults().get(0);

1261

assertSame(cds2Dss, m.getSequence());

1262

assertEquals(4, m.getStart());

1263

assertEquals(6, m.getEnd());

1264

// map P to CCC

1265

sr = MappingUtils.buildSearchResults(pep2, 3, mappings);

1266

m = sr.getResults().get(0);

1267

assertSame(cds2Dss, m.getSequence());

1268

assertEquals(7, m.getStart());

1269

assertEquals(9, m.getEnd());

1270

1271

1272

* check cds2 acquired a variant feature in position 5

1273

1274

List<SequenceFeature> sfs = cds2Dss.getSequenceFeatures();

1275

assertNotNull(sfs);

1276

assertEquals(1, sfs.size());

1277

assertEquals("variant", sfs.get(0).type);

1278

assertEquals(5, sfs.get(0).begin);

1279

assertEquals(5, sfs.get(0).end);

}

/**

* Test the method that makes a cds-only alignment from a DNA sequence and its

1284

* product mappings, for the case where there are multiple exon mappings to

1285

* different protein products.

1286

1287

@Test(groups = { "Functional" })

1288

public void testMakeCdsAlignment_multipleProteins()

1289

{

1290

SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");

1291

SequenceI pep1 = new Sequence("pep1", "GF"); // GGGTTT

1292

SequenceI pep2 = new Sequence("pep2", "KP"); // aaaccc

1293

SequenceI pep3 = new Sequence("pep3", "KF"); // aaaTTT

1294

dna1.createDatasetSequence();

1295

pep1.createDatasetSequence();

1296

pep2.createDatasetSequence();

1297

pep3.createDatasetSequence();

1298

pep1.getDatasetSequence()

1299

.addDBRef(new DBRefEntry("EMBLCDS", "2", "A12345"));

1300

pep2.getDatasetSequence()

1301

.addDBRef(new DBRefEntry("EMBLCDS", "3", "A12346"));

1302

pep3.getDatasetSequence()

1303

.addDBRef(new DBRefEntry("EMBLCDS", "4", "A12347"));

1304

1305

1306

* Create the CDS alignment

1307

1308

AlignmentI dna = new Alignment(new SequenceI[] { dna1 });

1309

dna.setDataset(null);

1310

1311

1312

* Make the mappings from dna to protein

1313

1314

// map ...GGG...TTT to GF

1315

MapList map = new MapList(new int[] { 4, 6, 10, 12 },

1316

new int[]

1317

{ 1, 2 }, 3, 1);

1318

AlignedCodonFrame acf = new AlignedCodonFrame();

1319

acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);

1320

dna.addCodonFrame(acf);

1321

1322

// map aaa...ccc to KP

1323

map = new MapList(new int[] { 1, 3, 7, 9 }, new int[] { 1, 2 }, 3, 1);

1324

acf = new AlignedCodonFrame();

1325

acf.addMap(dna1.getDatasetSequence(), pep2.getDatasetSequence(), map);

1326

dna.addCodonFrame(acf);

1327

1328

// map aaa......TTT to KF

1329

map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 2 }, 3, 1);

1330

acf = new AlignedCodonFrame();

1331

acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);

1332

dna.addCodonFrame(acf);

1333

1334

1335

* execute method under test

1336

1337

AlignmentI cdsal = AlignmentUtils

1338

.makeCdsAlignment(new SequenceI[]

1339

{ dna1 }, dna.getDataset(), null);

1340

1341

1342

* Verify we have 3 cds sequences, mapped to pep1/2/3 respectively

1343

1344

List<SequenceI> cds = cdsal.getSequences();

1345

assertEquals(3, cds.size());

1346

1347

1348

* verify shared, extended alignment dataset

1349

1350

assertSame(cdsal.getDataset(), dna.getDataset());

1351

assertTrue(dna.getDataset().getSequences()

1352

.contains(cds.get(0).getDatasetSequence()));

1353

assertTrue(dna.getDataset().getSequences()

1354

.contains(cds.get(1).getDatasetSequence()));

1355

assertTrue(dna.getDataset().getSequences()

1356

.contains(cds.get(2).getDatasetSequence()));

1357

1358

1359

* verify aligned cds sequences and their xrefs

1360

1361

SequenceI cdsSeq = cds.get(0);

1362

assertEquals("GGGTTT", cdsSeq.getSequenceAsString());

1363

// assertEquals("dna1|A12345", cdsSeq.getName());

1364

assertEquals("CDS|dna1", cdsSeq.getName());

1365

// assertEquals(1, cdsSeq.getDBRefs().length);

1366

// DBRefEntry cdsRef = cdsSeq.getDBRefs()[0];

1367

// assertEquals("EMBLCDS", cdsRef.getSource());

1368

// assertEquals("2", cdsRef.getVersion());

1369

// assertEquals("A12345", cdsRef.getAccessionId());

1370

1371

cdsSeq = cds.get(1);

1372

assertEquals("aaaccc", cdsSeq.getSequenceAsString());

1373

// assertEquals("dna1|A12346", cdsSeq.getName());

1374

assertEquals("CDS|dna1", cdsSeq.getName());

1375

// assertEquals(1, cdsSeq.getDBRefs().length);

1376

// cdsRef = cdsSeq.getDBRefs()[0];

1377

// assertEquals("EMBLCDS", cdsRef.getSource());

1378

// assertEquals("3", cdsRef.getVersion());

1379

// assertEquals("A12346", cdsRef.getAccessionId());

1380

1381

cdsSeq = cds.get(2);

1382

assertEquals("aaaTTT", cdsSeq.getSequenceAsString());

1383

// assertEquals("dna1|A12347", cdsSeq.getName());

1384

assertEquals("CDS|dna1", cdsSeq.getName());

1385

// assertEquals(1, cdsSeq.getDBRefs().length);

1386

// cdsRef = cdsSeq.getDBRefs()[0];

1387

// assertEquals("EMBLCDS", cdsRef.getSource());

1388

// assertEquals("4", cdsRef.getVersion());

1389

// assertEquals("A12347", cdsRef.getAccessionId());

1390

1391

1392

* Verify there are mappings from each cds sequence to its protein product

1393

* and also to its dna source

1394

1395

List<AlignedCodonFrame> newMappings = cdsal.getCodonFrames();

1396

1397

1398

* 6 mappings involve dna1 (to pep1/2/3, cds1/2/3)

1399

1400

List<AlignedCodonFrame> dnaMappings = MappingUtils

1401

.findMappingsForSequence(dna1, newMappings);

1402

assertEquals(6, dnaMappings.size());

* dna1 to pep1

List<AlignedCodonFrame> mappings = MappingUtils

1408

.findMappingsForSequence(pep1, dnaMappings);

1409

assertEquals(1, mappings.size());

1410

assertEquals(1, mappings.get(0).getMappings().size());

1411

assertSame(pep1.getDatasetSequence(),

1412

mappings.get(0).getMappings().get(0).getMapping().getTo());

* dna1 to cds1

List<AlignedCodonFrame> dnaToCds1Mappings = MappingUtils

1418

.findMappingsForSequence(cds.get(0), dnaMappings);

1419

Mapping mapping = dnaToCds1Mappings.get(0).getMappings().get(0)

1420

.getMapping();

1421

assertSame(cds.get(0).getDatasetSequence(), mapping.getTo());

1422

assertEquals("G(1) in CDS should map to G(4) in DNA", 4,

1423

mapping.getMap().getToPosition(1));

* dna1 to pep2

mappings = MappingUtils.findMappingsForSequence(pep2, dnaMappings);

1429

assertEquals(1, mappings.size());

1430

assertEquals(1, mappings.get(0).getMappings().size());

1431

assertSame(pep2.getDatasetSequence(),

1432

mappings.get(0).getMappings().get(0).getMapping().getTo());

* dna1 to cds2

List<AlignedCodonFrame> dnaToCds2Mappings = MappingUtils

1438

.findMappingsForSequence(cds.get(1), dnaMappings);

1439

mapping = dnaToCds2Mappings.get(0).getMappings().get(0).getMapping();

1440

assertSame(cds.get(1).getDatasetSequence(), mapping.getTo());

1441

assertEquals("c(4) in CDS should map to c(7) in DNA", 7,

1442

mapping.getMap().getToPosition(4));

* dna1 to pep3

mappings = MappingUtils.findMappingsForSequence(pep3, dnaMappings);

1448

assertEquals(1, mappings.size());

1449

assertEquals(1, mappings.get(0).getMappings().size());

1450

assertSame(pep3.getDatasetSequence(),

1451

mappings.get(0).getMappings().get(0).getMapping().getTo());

* dna1 to cds3

List<AlignedCodonFrame> dnaToCds3Mappings = MappingUtils

1457

.findMappingsForSequence(cds.get(2), dnaMappings);

1458

mapping = dnaToCds3Mappings.get(0).getMappings().get(0).getMapping();

1459

assertSame(cds.get(2).getDatasetSequence(), mapping.getTo());

1460

assertEquals("T(4) in CDS should map to T(10) in DNA", 10,

1461

mapping.getMap().getToPosition(4));

1462

}

1463

1464

@Test(groups = { "Functional" })

1465

public void testIsMappable()

1466

{

1467

SequenceI dna1 = new Sequence("dna1", "cgCAGtgGT");

1468

SequenceI aa1 = new Sequence("aa1", "RSG");

1469

AlignmentI al1 = new Alignment(new SequenceI[] { dna1 });

1470

AlignmentI al2 = new Alignment(new SequenceI[] { aa1 });

1471

1472

assertFalse(AlignmentUtils.isMappable(null, null));

1473

assertFalse(AlignmentUtils.isMappable(al1, null));

1474

assertFalse(AlignmentUtils.isMappable(null, al1));

1475

assertFalse(AlignmentUtils.isMappable(al1, al1));

1476

assertFalse(AlignmentUtils.isMappable(al2, al2));

1477

1478

assertTrue(AlignmentUtils.isMappable(al1, al2));

1479

assertTrue(AlignmentUtils.isMappable(al2, al1));

}

/**

* Test creating a mapping when the sequences involved do not start at residue

1484

* 1

1485

1486

* @throws IOException

1487

1488

@Test(groups = { "Functional" })

1489

public void testMapCdnaToProtein_forSubsequence() throws IOException

1490

{

1491

SequenceI prot = new Sequence("UNIPROT|V12345", "E-I--Q", 10, 12);

1492

prot.createDatasetSequence();

1493

1494

SequenceI dna = new Sequence("EMBL|A33333", "GAA--AT-C-CAG", 40, 48);

1495

dna.createDatasetSequence();

1496

1497

MapList map = AlignmentUtils.mapCdnaToProtein(prot, dna);

1498

assertEquals(10, map.getToLowest());

1499

assertEquals(12, map.getToHighest());

1500

assertEquals(40, map.getFromLowest());

1501

assertEquals(48, map.getFromHighest());

}

/**

* Test for the alignSequenceAs method where we have protein mapped to protein

1506

1507

@Test(groups = { "Functional" })

1508

public void testAlignSequenceAs_mappedProteinProtein()

1509

{

1510

1511

SequenceI alignMe = new Sequence("Match", "MGAASEV");

1512

alignMe.createDatasetSequence();

1513

SequenceI alignFrom = new Sequence("Query", "LQTGYMGAASEVMFSPTRR");

1514

alignFrom.createDatasetSequence();

1515

1516

AlignedCodonFrame acf = new AlignedCodonFrame();

1517

// this is like a domain or motif match of part of a peptide sequence

1518

MapList map = new MapList(new int[] { 6, 12 }, new int[] { 1, 7 }, 1,

1519

1);

1520

acf.addMap(alignFrom.getDatasetSequence(), alignMe.getDatasetSequence(),

1521

map);

1522

1523

AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "-", '-', true,

1524

true);

1525

assertEquals("-----MGAASEV-------", alignMe.getSequenceAsString());

}

/**

* Test for the alignSequenceAs method where there are trailing unmapped

1530

* residues in the model sequence

1531

1532

@Test(groups = { "Functional" })

1533

public void testAlignSequenceAs_withTrailingPeptide()

1534

{

1535

// map first 3 codons to KPF; G is a trailing unmapped residue

1536

MapList map = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3, 1);

1537

1538

checkAlignSequenceAs("AAACCCTTT", "K-PFG", true, true, map,

"AAA---CCCTTT---");

}

/**

* Tests for transferring features between mapped sequences

1544

1545

@Test(groups = { "Functional" })

1546

public void testTransferFeatures()

1547

{

1548

SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");

1549

SequenceI cds = new Sequence("cds/10-15", "TAGGCC");

1550

1551

// no overlap

1552

dna.addSequenceFeature(

1553

new SequenceFeature("type1", "desc1", 1, 2, 1f, null));

1554

// partial overlap - to [1, 1]

1555

dna.addSequenceFeature(

1556

new SequenceFeature("type2", "desc2", 3, 4, 2f, null));

1557

// exact overlap - to [1, 3]

1558

dna.addSequenceFeature(

1559

new SequenceFeature("type3", "desc3", 4, 6, 3f, null));

1560

// spanning overlap - to [2, 5]

1561

dna.addSequenceFeature(

1562

new SequenceFeature("type4", "desc4", 5, 11, 4f, null));

1563

// exactly overlaps whole mapped range [1, 6]

1564

dna.addSequenceFeature(

1565

new SequenceFeature("type5", "desc5", 4, 12, 5f, null));

1566

// no overlap (internal)

1567

dna.addSequenceFeature(

1568

new SequenceFeature("type6", "desc6", 7, 9, 6f, null));

1569

// no overlap (3' end)

1570

dna.addSequenceFeature(

1571

new SequenceFeature("type7", "desc7", 13, 15, 7f, null));

1572

// overlap (3' end) - to [6, 6]

1573

dna.addSequenceFeature(

1574

new SequenceFeature("type8", "desc8", 12, 12, 8f, null));

1575

// extended overlap - to [6, +]

1576

dna.addSequenceFeature(

1577

new SequenceFeature("type9", "desc9", 12, 13, 9f, null));

1578

1579

MapList map = new MapList(new int[] { 4, 6, 10, 12 },

new int[]

{ 1, 6 }, 1, 1);

* transferFeatures() will build 'partial overlap' for regions

1585

* that partially overlap 5' or 3' (start or end) of target sequence

1586

1587

AlignmentUtils.transferFeatures(dna, cds, map, null);

1588

List<SequenceFeature> sfs = cds.getSequenceFeatures();

1589

assertEquals(6, sfs.size());

1590

1591

SequenceFeature sf = sfs.get(0);

1592

assertEquals("type2", sf.getType());

1593

assertEquals("desc2", sf.getDescription());

1594

assertEquals(2f, sf.getScore());

1595

assertEquals(1, sf.getBegin());

1596

assertEquals(1, sf.getEnd());

1597

1598

sf = sfs.get(1);

1599

assertEquals("type3", sf.getType());

1600

assertEquals("desc3", sf.getDescription());

1601

assertEquals(3f, sf.getScore());

1602

assertEquals(1, sf.getBegin());

1603

assertEquals(3, sf.getEnd());

1604

1605

sf = sfs.get(2);

1606

assertEquals("type4", sf.getType());

1607

assertEquals(2, sf.getBegin());

1608

assertEquals(5, sf.getEnd());

1609

1610

sf = sfs.get(3);

1611

assertEquals("type5", sf.getType());

1612

assertEquals(1, sf.getBegin());

1613

assertEquals(6, sf.getEnd());

1614

1615

sf = sfs.get(4);

1616

assertEquals("type8", sf.getType());

1617

assertEquals(6, sf.getBegin());

1618

assertEquals(6, sf.getEnd());

1619

1620

sf = sfs.get(5);

1621

assertEquals("type9", sf.getType());

1622

assertEquals(6, sf.getBegin());

1623

assertEquals(6, sf.getEnd());

}

/**

* Tests for transferring features between mapped sequences

1628

1629

@Test(groups = { "Functional" })

1630

public void testTransferFeatures_withOmit()

1631

{

1632

SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");

1633

SequenceI cds = new Sequence("cds/10-15", "TAGGCC");

1634

1635

MapList map = new MapList(new int[] { 4, 6, 10, 12 },

new int[]

{ 1, 6 }, 1, 1);

// [5, 11] maps to [2, 5]

1640

dna.addSequenceFeature(

1641

new SequenceFeature("type4", "desc4", 5, 11, 4f, null));

1642

// [4, 12] maps to [1, 6]

1643

dna.addSequenceFeature(

1644

new SequenceFeature("type5", "desc5", 4, 12, 5f, null));

1645

// [12, 12] maps to [6, 6]

1646

dna.addSequenceFeature(

1647

new SequenceFeature("type8", "desc8", 12, 12, 8f, null));

1648

1649

// desc4 and desc8 are the 'omit these' varargs

1650

AlignmentUtils.transferFeatures(dna, cds, map, null, "type4", "type8");

1651

List<SequenceFeature> sfs = cds.getSequenceFeatures();

1652

assertEquals(1, sfs.size());

1653

1654

SequenceFeature sf = sfs.get(0);

1655

assertEquals("type5", sf.getType());

1656

assertEquals(1, sf.getBegin());

1657

assertEquals(6, sf.getEnd());

}

/**

* Tests for transferring features between mapped sequences

1662

1663

@Test(groups = { "Functional" })

1664

public void testTransferFeatures_withSelect()

1665

{

1666

SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");

1667

SequenceI cds = new Sequence("cds/10-15", "TAGGCC");

1668

1669

MapList map = new MapList(new int[] { 4, 6, 10, 12 },

new int[]

{ 1, 6 }, 1, 1);

// [5, 11] maps to [2, 5]

1674

dna.addSequenceFeature(

1675

new SequenceFeature("type4", "desc4", 5, 11, 4f, null));

1676

// [4, 12] maps to [1, 6]

1677

dna.addSequenceFeature(

1678

new SequenceFeature("type5", "desc5", 4, 12, 5f, null));

1679

// [12, 12] maps to [6, 6]

1680

dna.addSequenceFeature(

1681

new SequenceFeature("type8", "desc8", 12, 12, 8f, null));

1682

1683

// "type5" is the 'select this type' argument

1684

AlignmentUtils.transferFeatures(dna, cds, map, "type5");

1685

List<SequenceFeature> sfs = cds.getSequenceFeatures();

1686

assertEquals(1, sfs.size());

1687

1688

SequenceFeature sf = sfs.get(0);

1689

assertEquals("type5", sf.getType());

1690

assertEquals(1, sf.getBegin());

1691

assertEquals(6, sf.getEnd());

}

/**

* Test the method that extracts the cds-only part of a dna alignment, for the

1696

* case where the cds should be aligned to match its nucleotide sequence.

1697

1698

@Test(groups = { "Functional" })

1699

public void testMakeCdsAlignment_alternativeTranscripts()

1700

{

1701

SequenceI dna1 = new Sequence("dna1", "aaaGGGCC-----CTTTaaaGGG");

1702

// alternative transcript of same dna skips CCC codon

1703

SequenceI dna2 = new Sequence("dna2", "aaaGGGCC-----cttTaaaGGG");

1704

// dna3 has no mapping (protein product) so should be ignored here

1705

SequenceI dna3 = new Sequence("dna3", "aaaGGGCCCCCGGGcttTaaaGGG");

1706

SequenceI pep1 = new Sequence("pep1", "GPFG");

1707

SequenceI pep2 = new Sequence("pep2", "GPG");

1708

dna1.createDatasetSequence();

1709

dna2.createDatasetSequence();

1710

dna3.createDatasetSequence();

1711

pep1.createDatasetSequence();

1712

pep2.createDatasetSequence();

1713

1714

AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });

1715

dna.setDataset(null);

1716

1717

MapList map = new MapList(new int[] { 4, 12, 16, 18 },

1718

new int[]

1719

{ 1, 4 }, 3, 1);

1720

AlignedCodonFrame acf = new AlignedCodonFrame();

1721

acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);

1722

dna.addCodonFrame(acf);

1723

map = new MapList(new int[] { 4, 8, 12, 12, 16, 18 },

1724

new int[]

1725

{ 1, 3 }, 3, 1);

1726

acf = new AlignedCodonFrame();

1727

acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);

1728

dna.addCodonFrame(acf);

1729

1730

AlignmentI cds = AlignmentUtils

1731

.makeCdsAlignment(new SequenceI[]

1732

{ dna1, dna2, dna3 }, dna.getDataset(), null);

1733

List<SequenceI> cdsSeqs = cds.getSequences();

1734

assertEquals(2, cdsSeqs.size());

1735

assertEquals("GGGCCCTTTGGG", cdsSeqs.get(0).getSequenceAsString());

1736

assertEquals("GGGCCTGGG", cdsSeqs.get(1).getSequenceAsString());

1737

1738

1739

* verify shared, extended alignment dataset

1740

1741

assertSame(dna.getDataset(), cds.getDataset());

1742

assertTrue(dna.getDataset().getSequences()

1743

.contains(cdsSeqs.get(0).getDatasetSequence()));

1744

assertTrue(dna.getDataset().getSequences()

1745

.contains(cdsSeqs.get(1).getDatasetSequence()));

1746

1747

1748

* Verify 6 mappings: dna1 to cds1, cds1 to pep1, dna1 to pep1

1749

* and the same for dna2/cds2/pep2

1750

1751

List<AlignedCodonFrame> mappings = cds.getCodonFrames();

1752

assertEquals(6, mappings.size());

1753

1754

1755

* 2 mappings involve pep1

1756

1757

List<AlignedCodonFrame> pep1Mappings = MappingUtils

1758

.findMappingsForSequence(pep1, mappings);

1759

assertEquals(2, pep1Mappings.size());

1760

1761

1762

* Get mapping of pep1 to cds1 and verify it

1763

* maps GPFG to 1-3,4-6,7-9,10-12

1764

1765

List<AlignedCodonFrame> pep1CdsMappings = MappingUtils

1766

.findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings);

1767

assertEquals(1, pep1CdsMappings.size());

1768

SearchResultsI sr = MappingUtils.buildSearchResults(pep1, 1,

1769

pep1CdsMappings);

1770

assertEquals(1, sr.getResults().size());

1771

SearchResultMatchI m = sr.getResults().get(0);

1772

assertEquals(cds.getSequenceAt(0).getDatasetSequence(),

1773

m.getSequence());

1774

assertEquals(1, m.getStart());

1775

assertEquals(3, m.getEnd());

1776

sr = MappingUtils.buildSearchResults(pep1, 2, pep1CdsMappings);

1777

m = sr.getResults().get(0);

1778

assertEquals(4, m.getStart());

1779

assertEquals(6, m.getEnd());

1780

sr = MappingUtils.buildSearchResults(pep1, 3, pep1CdsMappings);

1781

m = sr.getResults().get(0);

1782

assertEquals(7, m.getStart());

1783

assertEquals(9, m.getEnd());

1784

sr = MappingUtils.buildSearchResults(pep1, 4, pep1CdsMappings);

1785

m = sr.getResults().get(0);

1786

assertEquals(10, m.getStart());

1787

assertEquals(12, m.getEnd());

1788

1789

1790

* Get mapping of pep2 to cds2 and verify it

1791

* maps GPG in pep2 to 1-3,4-6,7-9 in second CDS sequence

1792

1793

List<AlignedCodonFrame> pep2Mappings = MappingUtils

1794

.findMappingsForSequence(pep2, mappings);

1795

assertEquals(2, pep2Mappings.size());

1796

List<AlignedCodonFrame> pep2CdsMappings = MappingUtils

1797

.findMappingsForSequence(cds.getSequenceAt(1), pep2Mappings);

1798

assertEquals(1, pep2CdsMappings.size());

1799

sr = MappingUtils.buildSearchResults(pep2, 1, pep2CdsMappings);

1800

assertEquals(1, sr.getResults().size());

1801

m = sr.getResults().get(0);

1802

assertEquals(cds.getSequenceAt(1).getDatasetSequence(),

1803

m.getSequence());

1804

assertEquals(1, m.getStart());

1805

assertEquals(3, m.getEnd());

1806

sr = MappingUtils.buildSearchResults(pep2, 2, pep2CdsMappings);

1807

m = sr.getResults().get(0);

1808

assertEquals(4, m.getStart());

1809

assertEquals(6, m.getEnd());

1810

sr = MappingUtils.buildSearchResults(pep2, 3, pep2CdsMappings);

1811

m = sr.getResults().get(0);

1812

assertEquals(7, m.getStart());

1813

assertEquals(9, m.getEnd());

}

/**

* Test the method that realigns protein to match mapped codon alignment.

1818

1819

@Test(groups = { "Functional" })

1820

public void testAlignProteinAsDna_incompleteStartCodon()

1821

{

1822

// seq1: incomplete start codon (not mapped), then [3, 11]

1823

SequenceI dna1 = new Sequence("Seq1", "ccAAA-TTT-GGG-");

1824

// seq2 codons are [4, 5], [8, 11]

1825

SequenceI dna2 = new Sequence("Seq2", "ccaAA-ttT-GGG-");

1826

// seq3 incomplete start codon at 'tt'

1827

SequenceI dna3 = new Sequence("Seq3", "ccaaa-ttt-GGG-");

1828

AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });

1829

dna.setDataset(null);

1830

1831

// prot1 has 'X' for incomplete start codon (not mapped)

1832

SequenceI prot1 = new Sequence("Seq1", "XKFG"); // X for incomplete start

1833

SequenceI prot2 = new Sequence("Seq2", "NG");

1834

SequenceI prot3 = new Sequence("Seq3", "XG"); // X for incomplete start

1835

AlignmentI protein = new Alignment(

1836

new SequenceI[]

1837

{ prot1, prot2, prot3 });

1838

protein.setDataset(null);

1839

1840

// map dna1 [3, 11] to prot1 [2, 4] KFG

1841

MapList map = new MapList(new int[] { 3, 11 }, new int[] { 2, 4 }, 3,

1842

1);

1843

AlignedCodonFrame acf = new AlignedCodonFrame();

1844

acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map);

1845

1846

// map dna2 [4, 5] [8, 11] to prot2 [1, 2] NG

1847

map = new MapList(new int[] { 4, 5, 8, 11 }, new int[] { 1, 2 }, 3, 1);

1848

acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map);

1849

1850

// map dna3 [9, 11] to prot3 [2, 2] G

1851

map = new MapList(new int[] { 9, 11 }, new int[] { 2, 2 }, 3, 1);

1852

acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map);

1853

1854

ArrayList<AlignedCodonFrame> acfs = new ArrayList<>();

1855

acfs.add(acf);

1856

protein.setCodonFrames(acfs);

1857

1858

1859

* verify X is included in the aligned proteins, and placed just

1860

* before the first mapped residue

1861

* CCT is between CCC and TTT

1862

1863

AlignmentUtils.alignProteinAsDna(protein, dna);

1864

assertEquals("XK-FG", prot1.getSequenceAsString());

1865

assertEquals("--N-G", prot2.getSequenceAsString());

1866

assertEquals("---XG", prot3.getSequenceAsString());

}

/**

* Tests for the method that maps the subset of a dna sequence that has CDS

1871

* (or subtype) feature - case where the start codon is incomplete.

1872

1873

@Test(groups = "Functional")

1874

public void testFindCdsPositions_fivePrimeIncomplete()

1875

{

1876

SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt");

1877

dnaSeq.createDatasetSequence();

1878

SequenceI ds = dnaSeq.getDatasetSequence();

1879

1880

// CDS for dna 5-6 (incomplete codon), 7-9

1881

SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null);

1882

sf.setPhase("2"); // skip 2 bases to start of next codon

1883

ds.addSequenceFeature(sf);

1884

// CDS for dna 13-15

1885

sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null);

1886

ds.addSequenceFeature(sf);

1887

1888

List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);

1889

1890

1891

* check the mapping starts with the first complete codon

1892

1893

assertEquals(6, MappingUtils.getLength(ranges));

1894

assertEquals(2, ranges.size());

1895

assertEquals(7, ranges.get(0)[0]);

1896

assertEquals(9, ranges.get(0)[1]);

1897

assertEquals(13, ranges.get(1)[0]);

1898

assertEquals(15, ranges.get(1)[1]);

}

/**

* Tests for the method that maps the subset of a dna sequence that has CDS

1903

* (or subtype) feature.

1904

1905

@Test(groups = "Functional")

1906

public void testFindCdsPositions()

1907

{

1908

SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt");

1909

dnaSeq.createDatasetSequence();

1910

SequenceI ds = dnaSeq.getDatasetSequence();

1911

1912

// CDS for dna 10-12

1913

SequenceFeature sf = new SequenceFeature("CDS_predicted", "", 10, 12,

1914

0f, null);

1915

sf.setStrand("+");

1916

ds.addSequenceFeature(sf);

1917

// CDS for dna 4-6

1918

sf = new SequenceFeature("CDS", "", 4, 6, 0f, null);

1919

sf.setStrand("+");

1920

ds.addSequenceFeature(sf);

1921

// exon feature should be ignored here

1922

sf = new SequenceFeature("exon", "", 7, 9, 0f, null);

1923

ds.addSequenceFeature(sf);

1924

1925

List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);

1926

1927

* verify ranges { [4-6], [12-10] }

1928

* note CDS ranges are ordered ascending even if the CDS

1929

* features are not

1930

1931

assertEquals(6, MappingUtils.getLength(ranges));

1932

assertEquals(2, ranges.size());

1933

assertEquals(4, ranges.get(0)[0]);

1934

assertEquals(6, ranges.get(0)[1]);

1935

assertEquals(10, ranges.get(1)[0]);

1936

assertEquals(12, ranges.get(1)[1]);

}

/**

* Tests for the method that maps the subset of a dna sequence that has CDS

1941

* (or subtype) feature, with CDS strand = '-' (reverse)

1942

1943

// test turned off as currently findCdsPositions is not strand-dependent

1944

// left in case it comes around again...

1945

@Test(groups = "Functional", enabled = false)

1946

public void testFindCdsPositions_reverseStrand()

1947

{

1948

SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt");

1949

dnaSeq.createDatasetSequence();

1950

SequenceI ds = dnaSeq.getDatasetSequence();

1951

1952

// CDS for dna 4-6

1953

SequenceFeature sf = new SequenceFeature("CDS", "", 4, 6, 0f, null);

1954

sf.setStrand("-");

1955

ds.addSequenceFeature(sf);

1956

// exon feature should be ignored here

1957

sf = new SequenceFeature("exon", "", 7, 9, 0f, null);

1958

ds.addSequenceFeature(sf);

1959

// CDS for dna 10-12

1960

sf = new SequenceFeature("CDS_predicted", "", 10, 12, 0f, null);

1961

sf.setStrand("-");

1962

ds.addSequenceFeature(sf);

1963

1964

List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);

1965

1966

* verify ranges { [12-10], [6-4] }

1967

1968

assertEquals(6, MappingUtils.getLength(ranges));

1969

assertEquals(2, ranges.size());

1970

assertEquals(12, ranges.get(0)[0]);

1971

assertEquals(10, ranges.get(0)[1]);

1972

assertEquals(6, ranges.get(1)[0]);

1973

assertEquals(4, ranges.get(1)[1]);

}

/**

* Tests for the method that maps the subset of a dna sequence that has CDS

1978

* (or subtype) feature - reverse strand case where the start codon is

1979

* incomplete.

1980

1981

@Test(groups = "Functional", enabled = false)

1982

// test turned off as currently findCdsPositions is not strand-dependent

1983

// left in case it comes around again...

1984

public void testFindCdsPositions_reverseStrandThreePrimeIncomplete()

1985

{

1986

SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt");

1987

dnaSeq.createDatasetSequence();

1988

SequenceI ds = dnaSeq.getDatasetSequence();

1989

1990

// CDS for dna 5-9

1991

SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null);

1992

sf.setStrand("-");

1993

ds.addSequenceFeature(sf);

1994

// CDS for dna 13-15

1995

sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null);

1996

sf.setStrand("-");

1997

sf.setPhase("2"); // skip 2 bases to start of next codon

1998

ds.addSequenceFeature(sf);

1999

2000

List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);

2001

2002

2003

* check the mapping starts with the first complete codon

2004

* expect ranges [13, 13], [9, 5]

2005

2006

assertEquals(6, MappingUtils.getLength(ranges));

2007

assertEquals(2, ranges.size());

2008

assertEquals(13, ranges.get(0)[0]);

2009

assertEquals(13, ranges.get(0)[1]);

2010

assertEquals(9, ranges.get(1)[0]);

2011

assertEquals(5, ranges.get(1)[1]);

2012

}

2013

2014

@Test(groups = "Functional")

2015

public void testAlignAs_alternateTranscriptsUngapped()

2016

{

2017

SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");

2018

SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");

2019

AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });

2020

((Alignment) dna).createDatasetAlignment();

2021

SequenceI cds1 = new Sequence("cds1", "GGGTTT");

2022

SequenceI cds2 = new Sequence("cds2", "CCCAAA");

2023

AlignmentI cds = new Alignment(new SequenceI[] { cds1, cds2 });

2024

((Alignment) cds).createDatasetAlignment();

2025

2026

AlignedCodonFrame acf = new AlignedCodonFrame();

2027

MapList map = new MapList(new int[] { 4, 9 }, new int[] { 1, 6 }, 1, 1);

2028

acf.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(), map);

2029

map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 6 }, 1, 1);

2030

acf.addMap(dna2.getDatasetSequence(), cds2.getDatasetSequence(), map);

2031

2032

2033

* verify CDS alignment is as:

2034

* cccGGGTTTaaa (cdna)

2035

* CCCgggtttAAA (cdna)

* ---GGGTTT--- (cds)

* CCC------AAA (cds)

dna.addCodonFrame(acf);

2041

AlignmentUtils.alignAs(cds, dna);

2042

assertEquals("---GGGTTT", cds.getSequenceAt(0).getSequenceAsString());

2043

assertEquals("CCC------AAA",

2044

cds.getSequenceAt(1).getSequenceAsString());

2045

}

2046

2047

@Test(groups = { "Functional" })

2048

public void testAddMappedPositions()

2049

{

2050

SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g");

2051

SequenceI seq1 = new Sequence("cds", "AAATTT");

2052

from.createDatasetSequence();

2053

seq1.createDatasetSequence();

2054

Mapping mapping = new Mapping(seq1,

2055

new MapList(new int[]

2056

{ 3, 6, 9, 10 }, new int[] { 1, 6 }, 1, 1));

2057

Map<Integer, Map<SequenceI, Character>> map = new TreeMap<>();

2058

AlignmentUtils.addMappedPositions(seq1, from, mapping, map);

2059

2060

2061

* verify map has seq1 residues in columns 3,4,6,7,11,12

2062

2063

assertEquals(6, map.size());

2064

assertEquals('A', map.get(3).get(seq1).charValue());

2065

assertEquals('A', map.get(4).get(seq1).charValue());

2066

assertEquals('A', map.get(6).get(seq1).charValue());

2067

assertEquals('T', map.get(7).get(seq1).charValue());

2068

assertEquals('T', map.get(11).get(seq1).charValue());

2069

assertEquals('T', map.get(12).get(seq1).charValue());

}

/**

* Test case where the mapping 'from' range includes a stop codon which is

2078

* absent in the 'to' range

2079

2080

@Test(groups = { "Functional" })

2081

public void testAddMappedPositions_withStopCodon()

2082

{

2083

SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g");

2084

SequenceI seq1 = new Sequence("cds", "AAATTT");

2085

from.createDatasetSequence();

2086

seq1.createDatasetSequence();

2087

Mapping mapping = new Mapping(seq1,

2088

new MapList(new int[]

2089

{ 3, 6, 9, 10 }, new int[] { 1, 6 }, 1, 1));

2090

Map<Integer, Map<SequenceI, Character>> map = new TreeMap<>();

2091

AlignmentUtils.addMappedPositions(seq1, from, mapping, map);

2092

2093

2094

* verify map has seq1 residues in columns 3,4,6,7,11,12

2095

2096

assertEquals(6, map.size());

2097

assertEquals('A', map.get(3).get(seq1).charValue());

2098

assertEquals('A', map.get(4).get(seq1).charValue());

2099

assertEquals('A', map.get(6).get(seq1).charValue());

2100

assertEquals('T', map.get(7).get(seq1).charValue());

2101

assertEquals('T', map.get(11).get(seq1).charValue());

2102

assertEquals('T', map.get(12).get(seq1).charValue());

}

/**

* Test for the case where the products for which we want CDS are specified.

2107

* This is to represent the case where EMBL has CDS mappings to both Uniprot

2108

* and EMBLCDSPROTEIN. makeCdsAlignment() should only return the mappings for

2109

* the protein sequences specified.

2110

2111

@Test(groups = { "Functional" })

2112

public void testMakeCdsAlignment_filterProducts()

2113

{

2114

SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");

2115

SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");

2116

SequenceI pep1 = new Sequence("Uniprot|pep1", "GF");

2117

SequenceI pep2 = new Sequence("Uniprot|pep2", "GFP");

2118

SequenceI pep3 = new Sequence("EMBL|pep3", "GF");

2119

SequenceI pep4 = new Sequence("EMBL|pep4", "GFP");

2120

dna1.createDatasetSequence();

2121

dna2.createDatasetSequence();

2122

pep1.createDatasetSequence();

2123

pep2.createDatasetSequence();

2124

pep3.createDatasetSequence();

2125

pep4.createDatasetSequence();

2126

AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });

2127

dna.setDataset(null);

2128

AlignmentI emblPeptides = new Alignment(new SequenceI[] { pep3, pep4 });

2129

emblPeptides.setDataset(null);

2130

2131

AlignedCodonFrame acf = new AlignedCodonFrame();

2132

MapList map = new MapList(new int[] { 4, 6, 10, 12 },

2133

new int[]

2134

{ 1, 2 }, 3, 1);

2135

acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);

2136

acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);

2137

dna.addCodonFrame(acf);

2138

2139

acf = new AlignedCodonFrame();

2140

map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 },

2141

3, 1);

2142

acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);

2143

acf.addMap(dna2.getDatasetSequence(), pep4.getDatasetSequence(), map);

2144

dna.addCodonFrame(acf);

2145

2146

2147

* execute method under test to find CDS for EMBL peptides only

2148

2149

AlignmentI cds = AlignmentUtils

2150

.makeCdsAlignment(new SequenceI[]

2151

{ dna1, dna2 }, dna.getDataset(),

2152

emblPeptides.getSequencesArray());

2153

2154

assertEquals(2, cds.getSequences().size());

2155

assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());

2156

assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());

2157

2158

2159

* verify shared, extended alignment dataset

2160

2161

assertSame(dna.getDataset(), cds.getDataset());

2162

assertTrue(dna.getDataset().getSequences()

2163

.contains(cds.getSequenceAt(0).getDatasetSequence()));

2164

assertTrue(dna.getDataset().getSequences()

2165

.contains(cds.getSequenceAt(1).getDatasetSequence()));

2166

2167

2168

* Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide

2169

* the mappings are on the shared alignment dataset

2170

2171

List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();

2172

2173

* 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)

2174

2175

assertEquals(6, cdsMappings.size());

2176

2177

2178

* verify that mapping sets for dna and cds alignments are different

2179

* [not current behaviour - all mappings are on the alignment dataset]

2180

2181

// select -> subselect type to test.

2182

// Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());

2183

// assertEquals(4, dna.getCodonFrames().size());

2184

// assertEquals(4, cds.getCodonFrames().size());

2185

2186

2187

* Two mappings involve pep3 (dna to pep3, cds to pep3)

2188

* Mapping from pep3 to GGGTTT in first new exon sequence

2189

2190

List<AlignedCodonFrame> pep3Mappings = MappingUtils

2191

.findMappingsForSequence(pep3, cdsMappings);

2192

assertEquals(2, pep3Mappings.size());

2193

List<AlignedCodonFrame> mappings = MappingUtils

2194

.findMappingsForSequence(cds.getSequenceAt(0), pep3Mappings);

2195

assertEquals(1, mappings.size());

2196

2197

// map G to GGG

2198

SearchResultsI sr = MappingUtils.buildSearchResults(pep3, 1, mappings);

2199

assertEquals(1, sr.getResults().size());

2200

SearchResultMatchI m = sr.getResults().get(0);

2201

assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());

2202

assertEquals(1, m.getStart());

2203

assertEquals(3, m.getEnd());

2204

// map F to TTT

2205

sr = MappingUtils.buildSearchResults(pep3, 2, mappings);

2206

m = sr.getResults().get(0);

2207

assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());

2208

assertEquals(4, m.getStart());

2209

assertEquals(6, m.getEnd());

2210

2211

2212

* Two mappings involve pep4 (dna to pep4, cds to pep4)

2213

* Verify mapping from pep4 to GGGTTTCCC in second new exon sequence

2214

2215

List<AlignedCodonFrame> pep4Mappings = MappingUtils

2216

.findMappingsForSequence(pep4, cdsMappings);

2217

assertEquals(2, pep4Mappings.size());

2218

mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1),

2219

pep4Mappings);

2220

assertEquals(1, mappings.size());

2221

// map G to GGG

2222

sr = MappingUtils.buildSearchResults(pep4, 1, mappings);

2223

assertEquals(1, sr.getResults().size());

2224

m = sr.getResults().get(0);

2225

assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());

2226

assertEquals(1, m.getStart());

2227

assertEquals(3, m.getEnd());

2228

// map F to TTT

2229

sr = MappingUtils.buildSearchResults(pep4, 2, mappings);

2230

m = sr.getResults().get(0);

2231

assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());

2232

assertEquals(4, m.getStart());

2233

assertEquals(6, m.getEnd());

2234

// map P to CCC

2235

sr = MappingUtils.buildSearchResults(pep4, 3, mappings);

2236

m = sr.getResults().get(0);

2237

assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());

2238

assertEquals(7, m.getStart());

2239

assertEquals(9, m.getEnd());

}

/**

* Test the method that just copies aligned sequences, provided all sequences

2244

* to be aligned share the aligned sequence's dataset

2245

2246

@Test(groups = "Functional")

2247

public void testAlignAsSameSequences()

2248

{

2249

SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");

2250

SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");

2251

AlignmentI al1 = new Alignment(new SequenceI[] { dna1, dna2 });

2252

((Alignment) al1).createDatasetAlignment();

2253

2254

SequenceI dna3 = new Sequence(dna1);

2255

SequenceI dna4 = new Sequence(dna2);

2256

assertSame(dna3.getDatasetSequence(), dna1.getDatasetSequence());

2257

assertSame(dna4.getDatasetSequence(), dna2.getDatasetSequence());

2258

String seq1 = "-cc-GG-GT-TT--aaa";

2259

dna3.setSequence(seq1);

2260

String seq2 = "C--C-Cgg--gtt-tAA-A-";

2261

dna4.setSequence(seq2);

2262

AlignmentI al2 = new Alignment(new SequenceI[] { dna3, dna4 });

2263

((Alignment) al2).createDatasetAlignment();

2264

2265

2266

* alignment removes gapped columns (two internal, two trailing)

2267

2268

assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2));

2269

String aligned1 = "-cc-GG-GTTT-aaa";

2270

assertEquals(aligned1, al1.getSequenceAt(0).getSequenceAsString());

2271

String aligned2 = "C--C-Cgg-gtttAAA";

2272

assertEquals(aligned2, al1.getSequenceAt(1).getSequenceAsString());

2273

2274

2275

* add another sequence to 'aligned' - should still succeed, since

2276

* unaligned sequences still share a dataset with aligned sequences

2277

2278

SequenceI dna5 = new Sequence("dna5", "CCCgggtttAAA");

2279

dna5.createDatasetSequence();

2280

al2.addSequence(dna5);

2281

assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2));

2282

assertEquals(aligned1, al1.getSequenceAt(0).getSequenceAsString());

2283

assertEquals(aligned2, al1.getSequenceAt(1).getSequenceAsString());

2284

2285

2286

* add another sequence to 'unaligned' - should fail, since now not

2287

* all unaligned sequences share a dataset with aligned sequences

2288

2289

SequenceI dna6 = new Sequence("dna6", "CCCgggtttAAA");

2290

dna6.createDatasetSequence();

2291

al1.addSequence(dna6);

2292

// JAL-2110 JBP Comment: what's the use case for this behaviour ?

2293

assertFalse(AlignmentUtils.alignAsSameSequences(al1, al2));

2294

}

2295

2296

@Test(groups = "Functional")

2297

public void testAlignAsSameSequencesMultipleSubSeq()

2298

{

2299

SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");

2300

SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");

2301

SequenceI as1 = dna1.deriveSequence(); // cccGGGTTTaaa/1-12

2302

SequenceI as2 = dna1.deriveSequence().getSubSequence(3, 7); // GGGT/4-7

2303

SequenceI as3 = dna2.deriveSequence(); // CCCgggtttAAA/1-12

2304

as1.insertCharAt(6, 5, '-');

2305

assertEquals("cccGGG-----TTTaaa", as1.getSequenceAsString());

2306

as2.insertCharAt(6, 5, '-');

2307

assertEquals("GGGT-----", as2.getSequenceAsString());

2308

as3.insertCharAt(3, 5, '-');

2309

assertEquals("CCC-----gggtttAAA", as3.getSequenceAsString());

2310

AlignmentI aligned = new Alignment(new SequenceI[] { as1, as2, as3 });

2311

2312

// why do we need to cast this still ?

2313

((Alignment) aligned).createDatasetAlignment();

2314

SequenceI uas1 = dna1.deriveSequence();

2315

SequenceI uas2 = dna1.deriveSequence().getSubSequence(3, 7);

2316

SequenceI uas3 = dna2.deriveSequence();

2317

AlignmentI tobealigned = new Alignment(

2318

new SequenceI[]

2319

{ uas1, uas2, uas3 });

2320

((Alignment) tobealigned).createDatasetAlignment();

2321

2322

2323

* alignAs lines up dataset sequences and removes empty columns (two)

2324

2325

assertTrue(AlignmentUtils.alignAsSameSequences(tobealigned, aligned));

2326

assertEquals("cccGGG---TTTaaa", uas1.getSequenceAsString());

2327

assertEquals("GGGT", uas2.getSequenceAsString());

2328

assertEquals("CCC---gggtttAAA", uas3.getSequenceAsString());

2329

}

2330

2331

@Test(groups = { "Functional" })

2332

public void testTransferGeneLoci()

2333

{

2334

SequenceI from = new Sequence("transcript",

2335

"aaacccgggTTTAAACCCGGGtttaaacccgggttt");

2336

SequenceI to = new Sequence("CDS", "TTTAAACCCGGG");

2337

MapList map = new MapList(new int[] { 1, 12 }, new int[] { 10, 21 }, 1,

1);

* first with nothing to transfer

2342

2343

AlignmentUtils.transferGeneLoci(from, map, to);

2344

assertNull(to.getGeneLoci());

2345

2346

2347

* next with gene loci set on 'from' sequence

2348

2349

int[] exons = new int[] { 100, 105, 155, 164, 210, 229 };

2350

MapList geneMap = new MapList(new int[] { 1, 36 }, exons, 1, 1);

2351

from.setGeneLoci("human", "GRCh38", "7", geneMap);

2352

AlignmentUtils.transferGeneLoci(from, map, to);

2353

2354

GeneLociI toLoci = to.getGeneLoci();

2355

assertNotNull(toLoci);

2356

// DBRefEntry constructor upper-cases 'source'

2357

assertEquals("HUMAN", toLoci.getSpeciesId());

2358

assertEquals("GRCh38", toLoci.getAssemblyId());

2359

assertEquals("7", toLoci.getChromosomeId());

2360

2361

2362

* transcript 'exons' are 1-6, 7-16, 17-36

2363

* CDS 1:12 is transcript 10-21

2364

* transcript 'CDS' is 10-16, 17-21

2365

* which is 'gene' 158-164, 210-214

2366

2367

MapList toMap = toLoci.getMapping();

2368

assertEquals(1, toMap.getFromRanges().size());

2369

assertEquals(2, toMap.getFromRanges().get(0).length);

2370

assertEquals(1, toMap.getFromRanges().get(0)[0]);

2371

assertEquals(12, toMap.getFromRanges().get(0)[1]);

2372

assertEquals(2, toMap.getToRanges().size());

2373

assertEquals(2, toMap.getToRanges().get(0).length);

2374

assertEquals(158, toMap.getToRanges().get(0)[0]);

2375

assertEquals(164, toMap.getToRanges().get(0)[1]);

2376

assertEquals(210, toMap.getToRanges().get(1)[0]);

2377

assertEquals(214, toMap.getToRanges().get(1)[1]);

2378

// or summarised as (but toString might change in future):

2379

assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]",

toMap.toString());

* an existing value is not overridden

2384

2385

geneMap = new MapList(new int[] { 1, 36 }, new int[] { 36, 1 }, 1, 1);

2386

from.setGeneLoci("inhuman", "GRCh37", "6", geneMap);

2387

AlignmentUtils.transferGeneLoci(from, map, to);

2388

assertEquals("GRCh38", toLoci.getAssemblyId());

2389

assertEquals("7", toLoci.getChromosomeId());

2390

toMap = toLoci.getMapping();

2391

assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]",

toMap.toString());

}

/**

* Tests for the method that maps nucleotide to protein based on CDS features

2397

2398

@Test(groups = "Functional")

2399

public void testMapCdsToProtein()

2400

{

2401

SequenceI peptide = new Sequence("pep", "KLQ");

2402

2403

2404

* Case 1: CDS 3 times length of peptide

2405

* NB method only checks lengths match, not translation

2406

2407

SequenceI dna = new Sequence("dna", "AACGacgtCTCCT");

2408

dna.createDatasetSequence();

2409

dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));

2410

dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 13, null));

2411

MapList ml = AlignmentUtils.mapCdsToProtein(dna, peptide);

2412

assertEquals(3, ml.getFromRatio());

2413

assertEquals(1, ml.getToRatio());

2414

assertEquals("[[1, 3]]",

2415

Arrays.deepToString(ml.getToRanges().toArray()));

2416

assertEquals("[[1, 4], [9, 13]]",

2417

Arrays.deepToString(ml.getFromRanges().toArray()));

2418

2419

2420

* Case 2: CDS 3 times length of peptide + stop codon

2421

* (note code does not currently check trailing codon is a stop codon)

2422

2423

dna = new Sequence("dna", "AACGacgtCTCCTCCC");

2424

dna.createDatasetSequence();

2425

dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));

2426

dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 16, null));

2427

ml = AlignmentUtils.mapCdsToProtein(dna, peptide);

2428

assertEquals(3, ml.getFromRatio());

2429

assertEquals(1, ml.getToRatio());

2430

assertEquals("[[1, 3]]",

2431

Arrays.deepToString(ml.getToRanges().toArray()));

2432

assertEquals("[[1, 4], [9, 13]]",

2433

Arrays.deepToString(ml.getFromRanges().toArray()));

2434

2435

2436

* Case 3: CDS longer than 3 * peptide + stop codon - no mapping is made

2437

2438

dna = new Sequence("dna", "AACGacgtCTCCTTGATCA");

2439

dna.createDatasetSequence();

2440

dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));

2441

dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 19, null));

2442

ml = AlignmentUtils.mapCdsToProtein(dna, peptide);

assertNull(ml);

* Case 4: CDS shorter than 3 * peptide - no mapping is made

2447

2448

dna = new Sequence("dna", "AACGacgtCTCC");

2449

dna.createDatasetSequence();

2450

dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));

2451

dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 12, null));

2452

ml = AlignmentUtils.mapCdsToProtein(dna, peptide);

assertNull(ml);

* Case 5: CDS 3 times length of peptide + part codon - mapping is truncated

2457

2458

dna = new Sequence("dna", "AACGacgtCTCCTTG");

2459

dna.createDatasetSequence();

2460

dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));

2461

dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, null));

2462

ml = AlignmentUtils.mapCdsToProtein(dna, peptide);

2463

assertEquals(3, ml.getFromRatio());

2464

assertEquals(1, ml.getToRatio());

2465

assertEquals("[[1, 3]]",

2466

Arrays.deepToString(ml.getToRanges().toArray()));

2467

assertEquals("[[1, 4], [9, 13]]",

2468

Arrays.deepToString(ml.getFromRanges().toArray()));

2469

2470

2471

* Case 6: incomplete start codon corresponding to X in peptide

2472

2473

dna = new Sequence("dna", "ACGacgtCTCCTTGG");

2474

dna.createDatasetSequence();

2475

SequenceFeature sf = new SequenceFeature("CDS", "", 1, 3, null);

2476

sf.setPhase("2"); // skip 2 positions (AC) to start of next codon (GCT)

2477

dna.addSequenceFeature(sf);

2478

dna.addSequenceFeature(new SequenceFeature("CDS", "", 8, 15, null));

2479

peptide = new Sequence("pep", "XLQ");

2480

ml = AlignmentUtils.mapCdsToProtein(dna, peptide);

2481

assertEquals("[[2, 3]]",

2482

Arrays.deepToString(ml.getToRanges().toArray()));

2483

assertEquals("[[3, 3], [8, 12]]",

2484

Arrays.deepToString(ml.getFromRanges().toArray()));

}

/**

* Tests for the method that locates the CDS sequence that has a mapping to

2489

* the given protein. That is, given a transcript-to-peptide mapping, find the

2490

* cds-to-peptide mapping that relates to both, and return the CDS sequence.

2491

2492

@Test(groups = "Functional")

2493

public void testFindCdsForProtein()

2494

{

2495

List<AlignedCodonFrame> mappings = new ArrayList<>();

2496

AlignedCodonFrame acf1 = new AlignedCodonFrame();

2497

mappings.add(acf1);

2498

2499

SequenceI dna1 = new Sequence("dna1", "cgatATcgGCTATCTATGacg");

2500

dna1.createDatasetSequence();

2501

2502

// NB we currently exclude STOP codon from CDS sequences

2503

// the test would need to change if this changes in future

2504

SequenceI cds1 = new Sequence("cds1", "ATGCTATCT");

2505

cds1.createDatasetSequence();

2506

2507

SequenceI pep1 = new Sequence("pep1", "MLS");

2508

pep1.createDatasetSequence();

2509

List<AlignedCodonFrame> seqMappings = new ArrayList<>();

2510

MapList mapList = new MapList(new int[] { 5, 6, 9, 15 },

2511

new int[]

2512

{ 1, 3 }, 3, 1);

2513

Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList);

2514

2515

// add dna to peptide mapping

2516

seqMappings.add(acf1);

2517

acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),

mapList);

* first case - no dna-to-CDS mapping exists - search fails

2522

2523

SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1,

2524

seqMappings, dnaToPeptide);

assertNull(seq);

* second case - CDS-to-peptide mapping exists but no dna-to-CDS

2529

* - search fails

2530

2531

// todo this test fails if the mapping is added to acf1, not acf2

2532

// need to tidy up use of lists of mappings in AlignedCodonFrame

2533

AlignedCodonFrame acf2 = new AlignedCodonFrame();

2534

mappings.add(acf2);

2535

MapList cdsToPeptideMapping = new MapList(new int[] { 1, 9 },

2536

new int[]

2537

{ 1, 3 }, 3, 1);

2538

acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(),

2539

cdsToPeptideMapping);

2540

assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,

dnaToPeptide));

* third case - add dna-to-CDS mapping - CDS is now found!

2545

2546

MapList dnaToCdsMapping = new MapList(new int[] { 5, 6, 9, 15 },

2547

new int[]

2548

{ 1, 9 }, 1, 1);

2549

acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(),

2550

dnaToCdsMapping);

2551

seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,

2552

dnaToPeptide);

2553

assertSame(seq, cds1.getDatasetSequence());

}

/**

* Tests for the method that locates the CDS sequence that has a mapping to

2558

* the given protein. That is, given a transcript-to-peptide mapping, find the

2559

* cds-to-peptide mapping that relates to both, and return the CDS sequence.

2560

* This test is for the case where transcript and CDS are the same length.

2561

2562

@Test(groups = "Functional")

2563

public void testFindCdsForProtein_noUTR()

2564

{

2565

List<AlignedCodonFrame> mappings = new ArrayList<>();

2566

AlignedCodonFrame acf1 = new AlignedCodonFrame();

2567

mappings.add(acf1);

2568

2569

SequenceI dna1 = new Sequence("dna1", "ATGCTATCTTAA");

2570

dna1.createDatasetSequence();

2571

2572

// NB we currently exclude STOP codon from CDS sequences

2573

// the test would need to change if this changes in future

2574

SequenceI cds1 = new Sequence("cds1", "ATGCTATCT");

2575

cds1.createDatasetSequence();

2576

2577

SequenceI pep1 = new Sequence("pep1", "MLS");

2578

pep1.createDatasetSequence();

2579

List<AlignedCodonFrame> seqMappings = new ArrayList<>();

2580

MapList mapList = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3,

2581

1);

2582

Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList);

2583

2584

// add dna to peptide mapping

2585

seqMappings.add(acf1);

2586

acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),

mapList);

* first case - transcript lacks CDS features - it appears to be

2591

* the CDS sequence and is returned

2592

2593

SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1,

2594

seqMappings, dnaToPeptide);

2595

assertSame(seq, dna1.getDatasetSequence());

2596

2597

2598

* second case - transcript has CDS feature - this means it is

2599

* not returned as a match for CDS (CDS sequences don't have CDS features)

2600

2601

dna1.addSequenceFeature(

2602

new SequenceFeature(SequenceOntologyI.CDS, "cds", 1, 12, null));

2603

seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,

dnaToPeptide);

assertNull(seq);

* third case - CDS-to-peptide mapping exists but no dna-to-CDS

2609

* - search fails

2610

2611

// todo this test fails if the mapping is added to acf1, not acf2

2612

// need to tidy up use of lists of mappings in AlignedCodonFrame

2613

AlignedCodonFrame acf2 = new AlignedCodonFrame();

2614

mappings.add(acf2);

2615

MapList cdsToPeptideMapping = new MapList(new int[] { 1, 9 },

2616

new int[]

2617

{ 1, 3 }, 3, 1);

2618

acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(),

2619

cdsToPeptideMapping);

2620

assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,

dnaToPeptide));

* fourth case - add dna-to-CDS mapping - CDS is now found!

2625

2626

MapList dnaToCdsMapping = new MapList(new int[] { 1, 9 },

2627

new int[]

2628

{ 1, 9 }, 1, 1);

2629

acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(),

2630

dnaToCdsMapping);

2631

seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,

2632

dnaToPeptide);

2633

assertSame(seq, cds1.getDatasetSequence());

2634

}

2635

2636

@Test(groups = "Functional")

2637

public void testAddReferenceAnnotations()

2638

{

2639

SequenceI longseq = new Sequence("longA", "ASDASDASDASDAASDASDASDASDA");

2640

Annotation[] aa = new Annotation[longseq.getLength()];

2641

2642

for (int p = 0; p < aa.length; p++)

2643

{

2644

aa[p] = new Annotation("P", "pos " + (p + 1), (char) 0,

2645

(float) p + 1);

2646

}

2647

AlignmentAnnotation refAnnot = new AlignmentAnnotation("LongSeqAnnot",

2648

"Annotations", aa);

2649

refAnnot.setCalcId("Test");

2650

longseq.addAlignmentAnnotation(refAnnot);

2651

verifyExpectedSequenceAnnotation(refAnnot);

2652

2653

Alignment ourAl = new Alignment(

2654

new SequenceI[]

2655

{ longseq.getSubSequence(5, 10),

2656

longseq.getSubSequence(7, 12) });

2657

ourAl.createDatasetAlignment();

2658

2659

// transfer annotation

2660

SortedMap<String, String> tipEntries = new TreeMap<>();

2661

Map<SequenceI, List<AlignmentAnnotation>> candidates = new LinkedHashMap<>();

2662

2663

AlignmentUtils.findAddableReferenceAnnotations(ourAl.getSequences(),

2664

tipEntries, candidates, ourAl);

2665

AlignmentUtils.addReferenceAnnotations(candidates, ourAl, null);

2666

2667

assertNotNull(ourAl.getAlignmentAnnotation());

2668

assertEquals(ourAl.getAlignmentAnnotation().length, 2);

2669

2670

for (AlignmentAnnotation alan : ourAl.getAlignmentAnnotation())

2671

{

2672

verifyExpectedSequenceAnnotation(alan);

2673

}

2674

// Everything above works for 2.11.3 and 2.11.2.x.

2675

// now simulate copy/paste to new alignment

2676

SequenceI[] newSeqAl = new SequenceI[2];

2677

// copy sequences but no annotation

2678

newSeqAl[0] = new Sequence(ourAl.getSequenceAt(0),

2679

ourAl.getSequenceAt(0).getAnnotation());

2680

newSeqAl[1] = new Sequence(ourAl.getSequenceAt(1),

2681

ourAl.getSequenceAt(1).getAnnotation());

2682

2683

Alignment newAl = new Alignment(newSeqAl);

2684

// delete annotation

2685

for (SequenceI sq : newAl.getSequences())

2686

{

2687

sq.setAlignmentAnnotation(new AlignmentAnnotation[0]);

2688

}

2689

// JAL-4182 scenario test

2690

SequenceGroup sg = new SequenceGroup(Arrays.asList(newSeqAl));

2691

sg.setStartRes(0);

2692

sg.setEndRes(newAl.getWidth());

2693

AlignmentUtils.addReferenceAnnotationTo(newAl, newSeqAl[0],

2694

newSeqAl[0].getDatasetSequence().getAnnotation()[0], sg);

2695

AlignmentUtils.addReferenceAnnotationTo(newAl, newSeqAl[1],

2696

newSeqAl[1].getDatasetSequence().getAnnotation()[0], sg);

2697

for (AlignmentAnnotation alan : newAl.getAlignmentAnnotation())

2698

{

2699

verifyExpectedSequenceAnnotation(alan);

}

}

/**

* helper - tests annotation is mapped to position it was originally created

* for

* @param alan

private void verifyExpectedSequenceAnnotation(AlignmentAnnotation alan)

2710

{

2711

for (int c = 0; c < alan.annotations.length; c++)

2712

{

2713

Annotation a = alan.annotations[c];

2714

if (a != null)

2715

{

2716

assertEquals("Misaligned annotation at " + c,

2717

(float) alan.sequenceRef.findPosition(c), a.value);

}

else

{

assertTrue("Unexpected Null at position " + c,

2722

c >= alan.sequenceRef.getLength()

2723

|| Comparison.isGap(alan.sequenceRef.getCharAt(c)));

}

}

}

@Test(groups = "Functional")

2729

public void testAddReferenceContactMap()

2730

{

2731

SequenceI sq = new Sequence("a", "SSSQ");

2732

ContactMatrixI cm = new SeqDistanceContactMatrix(4);

2733

AlignmentAnnotation cm_aan = sq.addContactList(cm);

2734

cm_aan.description = cm_aan.description + " cm1";

2735

SequenceI dssq = sq.createDatasetSequence();

2736

2737

// remove annotation on our non-dataset sequence

2738

sq.removeAlignmentAnnotation(sq.getAnnotation()[0]);

2739

// test transfer

2740

Alignment al = new Alignment(new SequenceI[] { sq });

2741

SortedMap<String, String> tipEntries = new TreeMap<>();

2742

Map<SequenceI, List<AlignmentAnnotation>> candidates = new LinkedHashMap<>();

2743

2744

AlignmentUtils.findAddableReferenceAnnotations(al.getSequences(),

2745

tipEntries, candidates, al);

2746

AlignmentUtils.addReferenceAnnotations(candidates, al, null);

2747

assertTrue("No contact map annotation transferred",

2748

al.getAlignmentAnnotation() != null

2749

&& al.getAlignmentAnnotation().length == 1);

2750

AlignmentAnnotation alan = al.findAnnotations(sq, null, cm_aan.label)

2751

.iterator().next();

2752

ContactMatrixI t_cm = al.getContactMatrixFor(alan);

2753

assertNotNull("No contact map for the transferred annotation row.",

2754

t_cm);

2755

assertTrue(t_cm instanceof SeqDistanceContactMatrix);

2756

assertTrue(((SeqDistanceContactMatrix) t_cm).hasReferenceSeq());

2757

2758

ContactListI cl = al.getContactListFor(alan, 1);

2759

assertNotNull(

2760

"No contact matrix recovered after reference annotation transfer",

2761

cl);

2762

// semantics of sequence associated contact list is slightly tricky - column

2763

// 3 in alignment should have data

2764

cl = al.getContactListFor(alan, 3);

2765

assertNotNull(

2766

"Contact matrix should have data for last position in sequence",

2767

cl);

2768

2769

ContactMatrixI cm2 = new SeqDistanceContactMatrix(4);

2770

dssq.addContactList(cm2);

2771

tipEntries = new TreeMap<>();

2772

candidates = new LinkedHashMap<>();

2773

2774

AlignmentUtils.findAddableReferenceAnnotations(al.getSequences(),

2775

tipEntries, candidates, al);

2776

AlignmentUtils.addReferenceAnnotations(candidates, al, null);

2777

assertTrue("Expected two contact map annotation transferred",

2778

al.getAlignmentAnnotation() != null

2779

&& al.getAlignmentAnnotation().length == 2);

}

@Test(

groups = "Functional",

2785

dataProvider = "SecondaryStructureAnnotations")

2786

public void testSecondaryStructurePresentAndSources(

2787

AlignmentAnnotation[] annotations, boolean expectedSSPresent,

2788

ArrayList<String> expectedSSSources)

2789

{

2790

Assert.assertEquals(expectedSSPresent,

2791

AlignmentUtils.isSecondaryStructurePresent(annotations));

2792

}

2793

2794

@DataProvider(name = "SecondaryStructureAnnotations")

2795

public static Object[][] provideSecondaryStructureAnnotations()

2796

{

2797

AlignmentAnnotation ann1 = new AlignmentAnnotation(

2798

"Secondary Structure", "Secondary Structure",

2799

new Annotation[] {});

2800

AlignmentAnnotation ann2 = new AlignmentAnnotation("jnetpred",

2801

"jnetpred", new Annotation[] {});

2802

AlignmentAnnotation ann3 = new AlignmentAnnotation("Temp", "Temp",

2803

new Annotation[] {});

2804

AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "Temp",

2805

new Annotation[] {});

2806

2807

List<String> ssSources1 = new ArrayList<>(

2808

Arrays.asList("3D Structures"));

2809

List<String> ssSources2 = new ArrayList<>(Arrays.asList("JPred"));

2810

List<String> ssSources3 = new ArrayList<>(

2811

Arrays.asList("3D Structures", "JPred"));

2812

List<String> ssSources4 = new ArrayList<>();

2813

2814

return new Object[][] {

2815

{ new AlignmentAnnotation[]

2816

{ ann1, ann3, ann4 }, true, ssSources1 },

2817

{ new AlignmentAnnotation[]

2818

{ ann2, ann3, ann4 }, true, ssSources2 },

2819

{ new AlignmentAnnotation[]

2820

{ ann3, ann4 }, false, ssSources4 },

2821

{ new AlignmentAnnotation[] {}, false, ssSources4 },

2822

{ new AlignmentAnnotation[]

2823

{ ann1, ann2, ann3, ann4 }, true, ssSources3 } };

2824

}

2825

2826

@Test(dataProvider = "SecondaryStructureAnnotationColours")

2827

public void testSecondaryStructureAnnotationColour(char symbol,

2828

Color expectedColor)

2829

{

2830

Color actualColor = AlignmentUtils

2831

.getSecondaryStructureAnnotationColour(symbol);

2832

Assert.assertEquals(actualColor, expectedColor);

2833

}

2834

2835

@DataProvider(name = "SecondaryStructureAnnotationColours")

2836

public static Object[][] provideSecondaryStructureAnnotationColours()

2837

{

2838

return new Object[][] { { 'C', Color.gray }, { 'E', Color.green },

2839

{ 'H', Color.red },

2840

{ '-', Color.white } };

2841

}

2842

2843

@Test(dataProvider = "SSAnnotationPresence")

2844

public void testIsSSAnnotationPresent(

2845

Map<SequenceI, List<AlignmentAnnotation>> annotations,

2846

boolean expectedPresence)

2847

{

2848

boolean actualPresence = AlignmentUtils

2849

.isSSAnnotationPresent(annotations);

2850

Assert.assertEquals(actualPresence, expectedPresence);

2851

}

2852

2853

@DataProvider(name = "SSAnnotationPresence")

2854

public static Object[][] provideSSAnnotationPresence()

2855

{

2856

Map<SequenceI, List<AlignmentAnnotation>> annotations1 = new HashMap<>();

2857

SequenceI seq1 = new Sequence("Seq1", "ASD---ASD---ASD", 37, 45);

2858

List<AlignmentAnnotation> annotationsList1 = new ArrayList<>();

2859

annotationsList1.add(new AlignmentAnnotation("Secondary Structure",

2860

"Secondary Structure", new Annotation[] {}));

2861

annotations1.put(seq1, annotationsList1); // Annotation present secondary

2862

// structure for seq1

2863

2864

Map<SequenceI, List<AlignmentAnnotation>> annotations2 = new HashMap<>();

2865

SequenceI seq2 = new Sequence("Seq2", "ASD---ASD------", 37, 42);

2866

List<AlignmentAnnotation> annotationsList2 = new ArrayList<>();

2867

annotationsList2.add(new AlignmentAnnotation("Other Annotation",

2868

"Other Annotation", new Annotation[] {}));

2869

annotations2.put(seq2, annotationsList2); // Annotation not related to any

2870

// of secondary structure for seq2

2871

2872

Map<SequenceI, List<AlignmentAnnotation>> annotations3 = new HashMap<>();

2873

// Empty annotation map

2874

2875

Map<SequenceI, List<AlignmentAnnotation>> annotations4 = new HashMap<>();

2876

SequenceI seq4 = new Sequence("Seq4", "ASD---ASD---AS-", 37, 44);

2877

List<AlignmentAnnotation> annotationsList4 = new ArrayList<>();

2878

annotationsList4.add(new AlignmentAnnotation("jnetpred", "jnetpred",

2879

new Annotation[] {}));

2880

annotations4.put(seq4, annotationsList4); // Annotation present from JPred

2881

// for seq4

2882

2883

return new Object[][] { { annotations1, true }, // Annotations present

2884

// secondary structure

2885

// present

2886

{ annotations2, false }, // No annotations related to any of the

2887

// secondary structure present

2888

{ annotations3, false }, // Empty annotation map

2889

{ annotations4, true }, // Annotations present from JPred secondary

// structure present

};

}

@DataProvider(name = "SSSourceFromAnnotationDescription")

2895

public static Object[][] provideSSSourceFromAnnotationDescription()

2896

{

2897

Map<SequenceI, List<AlignmentAnnotation>> annotations1 = new HashMap<>();

2898

SequenceI seq1 = new Sequence("Seq1", "ASD---ASD---ASD", 37, 45);

2899

List<AlignmentAnnotation> annotationsList1 = new ArrayList<>();

2900

annotationsList1.add(new AlignmentAnnotation("jnetpred", "JPred Output",

2901

new Annotation[] {}));

2902

annotations1.put(seq1, annotationsList1); // Annotation present from JPred

2903

// for seq1

2904

2905

Map<SequenceI, List<AlignmentAnnotation>> annotations2 = new HashMap<>();

2906

SequenceI seq2 = new Sequence("Seq2", "ASD---ASD------", 37, 42);

2907

List<AlignmentAnnotation> annotationsList2 = new ArrayList<>();

2908

annotationsList2.add(new AlignmentAnnotation("Secondary Structure",

2909

"Secondary Structure for af-q43517-f1A", new Annotation[] {}));

2910

annotations2.put(seq2, annotationsList2); // Annotation present secondary

2911

// structure from Alphafold for

2912

// seq2

2913

2914

Map<SequenceI, List<AlignmentAnnotation>> annotations3 = new HashMap<>();

2915

// Empty annotation map

2916

2917

Map<SequenceI, List<AlignmentAnnotation>> annotations4 = new HashMap<>();

2918

SequenceI seq4 = new Sequence("Seq4", "ASD---ASD---AS-", 37, 44);

2919

List<AlignmentAnnotation> annotationsList4 = new ArrayList<>();

2920

annotationsList4.add(new AlignmentAnnotation("Secondary Structure",

2921

"Secondary Structure for 4zhpA", new Annotation[] {}));

2922

annotations4.put(seq4, annotationsList4); // Annotation present secondary

2923

// structure from pdb for seq4

2924

2925

Map<SequenceI, List<AlignmentAnnotation>> annotations5 = new HashMap<>();

2926

SequenceI seq5 = new Sequence("Seq5", "ASD---ASD---AS-", 37, 44);

2927

List<AlignmentAnnotation> annotationsList5 = new ArrayList<>();

2928

annotationsList5.add(new AlignmentAnnotation("Secondary Structure",

2929

"Secondary Structure for p09911_54-147__3a7wzn.1.p3502557454997462030P",

2930

new Annotation[] {}));

2931

annotations5.put(seq5, annotationsList5); // Annotation present secondary

2932

// structure from Swiss model for

2933

// seq5

2934

2935

// JPred Output - JPred

2936

// Secondary Structure for af-q43517-f1A - Alphafold

2937

// Secondary Structure for 4zhpA - Experimental

2938

// Secondary Structure for p09911_54-147__3a7wzn.1.p3502557454997462030P -

2939

// Swiss Model

2940

2941

return new Object[][] { { annotations1, "JPred" },

2942

{ annotations2, "Alphafold" },

2943

{ annotations3, null },

2944

{ annotations4, "PDB" },

2945

{ annotations5, "Swiss Model" } };

2946

}

2947

2948

}

Coverage Report

File AlignmentUtilsTests.java

Code metrics

Classes

Class AlignmentUtilsTests

Contributing tests

Contributing tests

Source view