File AlignmentUtils.java

Branches:

468

Statements:

934

Methods:

Classes:

LOC:

3,266

NCLOC:

1,992

Total complexity:

358

Complexity density:

0.38

Statements/Method:

15.57

Methods/Class:

Average method complexity:

5.97

Classes

Class	Line #	Total Statements	Complexity	TOTAL Coverage	Actions
AlignmentUtils	86	928	352	0.8162983781.6%
AlignmentUtils.DnaVariant	102	6	6	0.00%

Class AlignmentUtils

Class AlignmentUtils	Line # 86	Total Statements 928	Complexity 352	TOTAL Coverage 0.8162983781.6%
expandContext(AlignmentI,int) : AlignmentI expandContext(AlignmentI,int) : AlignmentI	143143	46.046	10.010	0.983871 0.98387198.4%
getSequenceIndex(AlignmentI,SequenceI) : int getSequenceIndex(AlignmentI,SequenceI) : int	249249	8.08	2.02	1.0 1.0100%
getSequencesByName(AlignmentI) : Map<String, List<SequenceI>> getSequencesByName(AlignmentI) : Map<String, List<SequenceI>>	272272	10.010	3.03	0.9285714 0.928571492.9%
mapProteinAlignmentToCdna(AlignmentI,AlignmentI) : boolean mapProteinAlignmentToCdna(AlignmentI,AlignmentI) : boolean	303303	7.07	3.03	0.7777778 0.777777877.8%
mapProteinToCdna(AlignmentI,AlignmentI,Set<SequenceI>,Set<SequenceI>,boolean) : boolean mapProteinToCdna(AlignmentI,AlignmentI,Set<SequenceI>,Set<SequenceI>,boolean) : boolean	345345	22.022	9.09	0.9375 0.937593.8%
mappingExists(List<AlignedCodonFrame>,SequenceI,SequenceI) : boolean mappingExists(List<AlignedCodonFrame>,SequenceI,SequenceI) : boolean	412412	5.05	3.03	0.6666667 0.666666766.7%
mapCdnaToProtein(SequenceI,SequenceI) : MapList mapCdnaToProtein(SequenceI,SequenceI) : MapList	445445	28.028	12.012	0.95238096 0.9523809695.2%
translatesAs(char[],int,char[]) : boolean translatesAs(char[],int,char[]) : boolean	535535	21.021	14.014	1.0 1.0100%
alignSequenceAs(SequenceI,AlignmentI,String,boolean,boolean) : boolean alignSequenceAs(SequenceI,AlignmentI,String,boolean,boolean) : boolean	612612	14.014	5.05	0.9 0.990%
alignSequenceAs(SequenceI,SequenceI,AlignedCodonFrame,String,char,boolean,boolean) : void alignSequenceAs(SequenceI,SequenceI,AlignedCodonFrame,String,char,boolean,boolean) : void	668668	60.060	20.020	1.0 1.0100%
calculateGapsToInsert(boolean,boolean,int,boolean,int,int,boolean) : int calculateGapsToInsert(boolean,boolean,int,boolean,int,int,boolean) : int	833833	15.015	10.010	1.0 1.0100%
alignProteinAsDna(AlignmentI,AlignmentI) : int alignProteinAsDna(AlignmentI,AlignmentI) : int	897897	6.06	3.03	0.625 0.62562.5%
alignCdsAsProtein(AlignmentI,AlignmentI) : int alignCdsAsProtein(AlignmentI,AlignmentI) : int	923923	18.018	5.05	0.8333333 0.833333383.3%
alignCdsSequenceAsProtein(SequenceI,AlignmentI,List<AlignedCodonFrame>,char) : boolean alignCdsSequenceAsProtein(SequenceI,AlignmentI,List<AlignedCodonFrame>,char) : boolean	969969	48.048	16.016	0.67105263 0.6710526367.1%
buildCodonColumnsMap(AlignmentI,AlignmentI,List<SequenceI>) : Map<AlignedCodon, Map<SequenceI, AlignedCodon>> buildCodonColumnsMap(AlignmentI,AlignmentI,List<SequenceI>) : Map<AlignedCodon, Map<SequenceI, AlignedCodon>>	11001100	13.013	2.02	1.0 1.0100%
addUnmappedPeptideStarts(Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,int) : void addUnmappedPeptideStarts(Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,int) : void	11561156	23.023	6.06	0.93939394 0.9393939493.9%
alignProteinAs(AlignmentI,Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,List<SequenceI>) : int alignProteinAs(AlignmentI,Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,List<SequenceI>) : int	12391239	17.017	2.02	1.0 1.0100%
addCodonPositions(SequenceI,SequenceI,char,Mapping,Map<AlignedCodon, Map<SequenceI, AlignedCodon>>) : void addCodonPositions(SequenceI,SequenceI,char,Mapping,Map<AlignedCodon, Map<SequenceI, AlignedCodon>>) : void	13041304	5.05	4.04	1.0 1.0100%
addCodonToMap(Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,AlignedCodon,SequenceI) : void addCodonToMap(Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,AlignedCodon,SequenceI) : void	13371337	5.05	2.02	1.0 1.0100%
isMappable(AlignmentI,AlignmentI) : boolean isMappable(AlignmentI,AlignmentI) : boolean	13671367	12.012	7.07	1.0 1.0100%
isMappable(SequenceI,SequenceI,List<AlignedCodonFrame>) : boolean isMappable(SequenceI,SequenceI,List<AlignedCodonFrame>) : boolean	14061406	8.08	6.06	0.625 0.62562.5%
findAddableReferenceAnnotations(List<SequenceI>,Map<String, String>,Map<SequenceI, List<AlignmentAnnotation>>,AlignmentI) : void findAddableReferenceAnnotations(List<SequenceI>,Map<String, String>,Map<SequenceI, List<AlignmentAnnotation>>,AlignmentI) : void	14531453	24.024	9.09	0.95 0.9595%
addReferenceAnnotations(Map<SequenceI, List<AlignmentAnnotation>>,AlignmentI,SequenceGroup) : void addReferenceAnnotations(Map<SequenceI, List<AlignmentAnnotation>>,AlignmentI,SequenceGroup) : void	15361536	3.03	1.01	1.0 1.0100%
isSSAnnotationPresent(Map<SequenceI, List<AlignmentAnnotation>>) : boolean isSSAnnotationPresent(Map<SequenceI, List<AlignmentAnnotation>>) : boolean	15491549	4.04	2.02	0.0 0.00%
addReferenceAnnotationTo(AlignmentI,SequenceI,AlignmentAnnotation,SequenceGroup) : AlignmentAnnotation addReferenceAnnotationTo(AlignmentI,SequenceI,AlignmentAnnotation,SequenceGroup) : AlignmentAnnotation	15781578	16.016	4.04	0.95454544 0.9545454495.5%
showOrHideSequenceAnnotations(AlignmentI,Collection<String>,List<SequenceI>,boolean,boolean) : void showOrHideSequenceAnnotations(AlignmentI,Collection<String>,List<SequenceI>,boolean,boolean) : void	16331633	6.06	7.07	0.9166667 0.916666791.7%
showOrHideAutoCalculatedAnnotationsForGroup(AlignmentI,String,SequenceGroup,boolean,boolean) : void showOrHideAutoCalculatedAnnotationsForGroup(AlignmentI,String,SequenceGroup,boolean,boolean) : void	16701670	6.06	7.07	0.0 0.00%
getFirstSequenceAnnotationOfType(AlignmentI,int) : AlignmentAnnotation getFirstSequenceAnnotationOfType(AlignmentI,int) : AlignmentAnnotation	16981698	6.06	4.04	0.0 0.00%
haveCrossRef(SequenceI,SequenceI) : boolean haveCrossRef(SequenceI,SequenceI) : boolean	17201720	1.01	1.01	1.0 1.0100%
hasCrossRef(SequenceI,SequenceI) : boolean hasCrossRef(SequenceI,SequenceI) : boolean	17351735	11.011	6.06	1.0 1.0100%
makeCdsAlignment(SequenceI[],AlignmentI,SequenceI[]) : AlignmentI makeCdsAlignment(SequenceI[],AlignmentI,SequenceI[]) : AlignmentI	17761776	66.066	16.016	0.8913044 0.891304489.1%
transferGeneLoci(SequenceI,MapList,SequenceI) : void transferGeneLoci(SequenceI,MapList,SequenceI) : void	20032003	8.08	4.04	0.9285714 0.928571492.9%
findCdsForProtein(List<AlignedCodonFrame>,SequenceI,List<AlignedCodonFrame>,Mapping) : SequenceI findCdsForProtein(List<AlignedCodonFrame>,SequenceI,List<AlignedCodonFrame>,Mapping) : SequenceI	20412041	19.019	11.011	0.9354839 0.935483993.5%
makeCdsSequence(SequenceI,Mapping,AlignmentI) : SequenceI makeCdsSequence(SequenceI,Mapping,AlignmentI) : SequenceI	21282128	32.032	10.010	0.62 0.6262%
propagateDBRefsToCDS(SequenceI,SequenceI,SequenceI,Mapping) : List<DBRefEntry> propagateDBRefsToCDS(SequenceI,SequenceI,SequenceI,Mapping) : List<DBRefEntry>	22212221	28.028	11.011	0.8863636 0.886363688.6%
transferFeatures(SequenceI,SequenceI,MapList,String,String) : int transferFeatures(SequenceI,SequenceI,MapList,String,String) : int	23092309	33.033	12.012	0.8867925 0.886792588.7%
mapCdsToProtein(SequenceI,SequenceI) : MapList mapCdsToProtein(SequenceI,SequenceI) : MapList	24052405	22.022	5.05	1.0 1.0100%
findCdsPositions(SequenceI) : List<int[]> findCdsPositions(SequenceI) : List<int[]>	24682468	20.020	7.07	0.9285714 0.928571492.9%
makeCopyAlignment(SequenceI[],SequenceI[],AlignmentI) : AlignmentI makeCopyAlignment(SequenceI[],SequenceI[],AlignmentI) : AlignmentI	25362536	21.021	9.09	0.0 0.00%
alignAs(AlignmentI,AlignmentI) : int alignAs(AlignmentI,AlignmentI) : int	25912591	26.026	5.05	0.88235295 0.8823529588.2%
alignAsSameSequences(AlignmentI,AlignmentI) : boolean alignAsSameSequences(AlignmentI,AlignmentI) : boolean	26702670	33.033	7.07	0.88372093 0.8837209388.4%
buildMappedColumnsMap(AlignmentI,AlignmentI,List<SequenceI>) : SortedMap<Integer, Map<SequenceI, Character>> buildMappedColumnsMap(AlignmentI,AlignmentI,List<SequenceI>) : SortedMap<Integer, Map<SequenceI, Character>>	27642764	11.011	3.03	0.93333334 0.9333333493.3%
addMappedPositions(SequenceI,SequenceI,Mapping,Map<Integer, Map<SequenceI, Character>>) : boolean addMappedPositions(SequenceI,SequenceI,Mapping,Map<Integer, Map<SequenceI, Character>>) : boolean	28182818	24.024	11.011	0.8 0.880%
looksLikeEnsembl(AlignmentI) : boolean looksLikeEnsembl(AlignmentI) : boolean	28932893	5.05	3.03	0.71428573 0.7142857371.4%
isSecondaryStructurePresent(AlignmentAnnotation[]) : boolean isSecondaryStructurePresent(AlignmentAnnotation[]) : boolean	29062906	8.08	3.03	0.8333333 0.833333383.3%
getSecondaryStructureAnnotationColour(char) : Color getSecondaryStructureAnnotationColour(char) : Color	29292929	7.07	4.04	0.0 0.00%
findSSAnnotationForGivenSeqposition(AlignmentAnnotation,int) : char findSSAnnotationForGivenSeqposition(AlignmentAnnotation,int) : char	29482948	9.09	5.05	0.93333334 0.9333333493.3%
extractSSSourceInAlignmentAnnotation(AlignmentAnnotation[]) : List<String> extractSSSourceInAlignmentAnnotation(AlignmentAnnotation[]) : List<String>	29752975	11.011	4.04	1.0 1.0100%
extractSSSourceFromAnnotationDescription(AlignmentAnnotation) : String extractSSSourceFromAnnotationDescription(AlignmentAnnotation) : String	30063006	26.026	15.015	0.76086956 0.7608695676.1%
getAlignmentAnnotationForSource(SequenceI,String) : List<AlignmentAnnotation> getAlignmentAnnotationForSource(SequenceI,String) : List<AlignmentAnnotation>	31083108	14.014	6.06	0.95454544 0.9545454495.5%
getSequenceAssociatedAlignmentAnnotations(AlignmentAnnotation[],String) : Map<SequenceI, ArrayList<AlignmentAnnotation>> getSequenceAssociatedAlignmentAnnotations(AlignmentAnnotation[],String) : Map<SequenceI, ArrayList<AlignmentAnnotation>>	31473147	9.09	5.05	0.53333336 0.5333333653.3%
isSecondaryStructureFrom(String,AlignmentAnnotation) : boolean isSecondaryStructureFrom(String,AlignmentAnnotation) : boolean	31843184	8.08	5.05	0.0 0.00%
getSecondaryStructureProviderKey(String) : String getSecondaryStructureProviderKey(String) : String	32103210	4.04	2.02	0.0 0.00%
reduceLabelLength(String) : String reduceLabelLength(String) : String	32193219	3.03	1.01	0.0 0.00%
getSecondaryStructureProviderColor(String) : Color getSecondaryStructureProviderColor(String) : Color	32353235	4.04	2.02	0.0 0.00%
assignSecondaryStructureProviderColor(Map<String, Color>,List<String>) : void assignSecondaryStructureProviderColor(Map<String, Color>,List<String>) : void	32453245	9.09	1.01	0.0 0.00%

Class AlignmentUtils.DnaVariant

Class AlignmentUtils.DnaVariant	Line # 102	Total Statements 6	Complexity 6	TOTAL Coverage 0.00%
DnaVariant(String) DnaVariant(String)	108108	2.02	1.01	0.0 0.00%
DnaVariant(String,SequenceFeature) DnaVariant(String,SequenceFeature)	114114	2.02	1.01	0.0 0.00%
getSource() : String getSource() : String	120120	1.01	2.02	0.0 0.00%
toString() : String toString() : String	128128	1.01	2.02	0.0 0.00%

Contributing tests

This file is covered by 284 tests. .

Contributing tests

Test contribution	Test	Result
0.29343367	jalview.io.CrossRef2xmlTests.openCrossrefsForEnsemblTwicejalview.io.CrossRef2xmlTests.openCrossrefsForEnsemblTwice	1PASS
0.125855	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignmentjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment	1PASS
0.10191519	jalview.bin.CommandsTest.structureImageOutputTestjalview.bin.CommandsTest.structureImageOutputTest	1PASS
0.10191519	jalview.bin.CommandsTest.structureImageOutputTestjalview.bin.CommandsTest.structureImageOutputTest	1PASS
0.100547194	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withXrefsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withXrefs	1PASS
0.100547194	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withStartAndStopCodonsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withStartAndStopCodons	1PASS
0.098495215	jalview.bin.CommandsTest2.structureOpeningArgsTestjalview.bin.CommandsTest2.structureOpeningArgsTest	1PASS
0.09712722	jalview.bin.CommandsTest2.structureOpeningArgsTestjalview.bin.CommandsTest2.structureOpeningArgsTest	1PASS
0.09712722	jalview.bin.CommandsTest2.structureOpeningArgsTestjalview.bin.CommandsTest2.structureOpeningArgsTest	1PASS
0.09370725	jalview.bin.CommandsTest.structureImageAnnotationsOutputTestjalview.bin.CommandsTest.structureImageAnnotationsOutputTest	1PASS
0.09370725	jalview.bin.CommandsTest2.structureOpeningArgsTestjalview.bin.CommandsTest2.structureOpeningArgsTest	1PASS
0.09370725	jalview.bin.CommandsTest.structureImageAnnotationsOutputTestjalview.bin.CommandsTest.structureImageAnnotationsOutputTest	1PASS
0.09370725	jalview.bin.CommandsTest2.structureOpeningArgsTestjalview.bin.CommandsTest2.structureOpeningArgsTest	1PASS
0.09370725	jalview.bin.CommandsTest.structureImageAnnotationsOutputTestjalview.bin.CommandsTest.structureImageAnnotationsOutputTest	1PASS
0.09370725	jalview.bin.CommandsTest.structureImageAnnotationsOutputTestjalview.bin.CommandsTest.structureImageAnnotationsOutputTest	1PASS
0.09097127	jalview.bin.CommandsTest.structureImageOutputTestjalview.bin.CommandsTest.structureImageOutputTest	1PASS
0.09097127	jalview.bin.CommandsTest.structureImageOutputTestjalview.bin.CommandsTest.structureImageOutputTest	1PASS
0.08960328	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_filterProductsjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_filterProducts	1PASS
0.090287276	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_multipleProteinsjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_multipleProteins	1PASS
0.0875513	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_noXrefsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_noXrefs	1PASS
0.08549932	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_alternativeTranscriptsjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_alternativeTranscripts	1PASS
0.08344733	jalview.io.FeaturesFileTest.simpleGff3FileLoaderjalview.io.FeaturesFileTest.simpleGff3FileLoader	1PASS
0.08002736	jalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTestjalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTest	1PASS
0.08002736	jalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTestjalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTest	1PASS
0.08002736	jalview.bin.CommandsTest.commandsOpenTestjalview.bin.CommandsTest.commandsOpenTest	1PASS
0.08002736	jalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTestjalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTest	1PASS
0.07865937	jalview.bin.CommandsTest.commandsOpenTestjalview.bin.CommandsTest.commandsOpenTest	1PASS
0.074555404	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_prioritiseXrefsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_prioritiseXrefs	1PASS
0.07318741	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withIntronsjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withIntrons	1PASS
0.07045144	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_noIntronsjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_noIntrons	1PASS
0.06634747	jalview.io.FeaturesFileTest.readGff3Filejalview.io.FeaturesFileTest.readGff3File	1PASS
0.06634747	jalview.io.FeaturesFileTest.simpleGff3RelaxedIdMatchingjalview.io.FeaturesFileTest.simpleGff3RelaxedIdMatching	1PASS
0.06634747	jalview.io.FeaturesFileTest.simpleGff3FileClassjalview.io.FeaturesFileTest.simpleGff3FileClass	1PASS
0.064295486	jalview.datamodel.AlignmentTest.testAlignAs_dnaAsDnajalview.datamodel.AlignmentTest.testAlignAs_dnaAsDna	1PASS
0.0629275	jalview.analysis.AlignmentUtilsTests.testAlignAs_alternateTranscriptsUngappedjalview.analysis.AlignmentUtilsTests.testAlignAs_alternateTranscriptsUngapped	1PASS
0.06361149	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_keepIntronGapsOnlyjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_keepIntronGapsOnly	1PASS
0.06361149	jalview.ext.jmol.JmolViewerTest.testAddStrToSingleSeqViewJMoljalview.ext.jmol.JmolViewerTest.testAddStrToSingleSeqViewJMol	1PASS
0.060875513	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withUnmappedProteinjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withUnmappedProtein	1PASS
0.059507523	jalview.project.Jalview2xmlTests.testStoreAndRecoverPDBEntryjalview.project.Jalview2xmlTests.testStoreAndRecoverPDBEntry	1PASS
0.059507523	jalview.ext.jmol.JmolViewerTest.testSingleSeqViewJMoljalview.ext.jmol.JmolViewerTest.testSingleSeqViewJMol	1PASS
0.059507523	jalview.ext.jmol.JmolParserTest.testAlignmentLoaderjalview.ext.jmol.JmolParserTest.testAlignmentLoader	1PASS
0.059507523	jalview.io.AnnotatedPDBFileInputTest.testJalviewProjectRelocationAnnotationjalview.io.AnnotatedPDBFileInputTest.testJalviewProjectRelocationAnnotation	1PASS
0.06019152	jalview.analysis.AlignmentUtilsTests.testAlignProteinAsDna_incompleteStartCodonjalview.analysis.AlignmentUtilsTests.testAlignProteinAsDna_incompleteStartCodon	1PASS
0.05745554	jalview.gui.StructureChooserTest.openStructureFileForSequenceTestjalview.gui.StructureChooserTest.openStructureFileForSequenceTest	1PASS
0.05745554	jalview.gui.StructureChooserTest.openStructureFileForSequenceTestjalview.gui.StructureChooserTest.openStructureFileForSequenceTest	1PASS
0.05745554	jalview.gui.StructureChooserTest.openStructureFileForSequenceTestjalview.gui.StructureChooserTest.openStructureFileForSequenceTest	1PASS
0.05608755	jalview.gui.AnnotationLabelsTest2.testIdWidthChangesjalview.gui.AnnotationLabelsTest2.testIdWidthChanges	1PASS
0.056771547	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withTrailingPeptidejalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withTrailingPeptide	1PASS
0.05745554	jalview.gui.StructureChooserTest.openStructureFileForSequenceTestjalview.gui.StructureChooserTest.openStructureFileForSequenceTest	1PASS
0.05608755	jalview.gui.AnnotationLabelsTest2.testIdWidthNoChangesjalview.gui.AnnotationLabelsTest2.testIdWidthNoChanges	1PASS
0.05608755	jalview.gui.AnnotationLabelsTest2.testIdWidthNoChangesjalview.gui.AnnotationLabelsTest2.testIdWidthNoChanges	1PASS
0.05608755	jalview.gui.AnnotationLabelsTest2.testIdWidthChangesjalview.gui.AnnotationLabelsTest2.testIdWidthChanges	1PASS
0.054719564	jalview.analysis.AlignmentUtilsTests.testAlignProteinAsDnajalview.analysis.AlignmentUtilsTests.testAlignProteinAsDna	1PASS
0.054035567	jalview.datamodel.AlignmentTest.testAlignAs_proteinAsCdnajalview.datamodel.AlignmentTest.testAlignAs_proteinAsCdna	1PASS
0.051983584	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_mappedProteinProteinjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_mappedProteinProtein	1PASS
0.050615594	jalview.analysis.AlignmentUtilsTests.testIsMappablejalview.analysis.AlignmentUtilsTests.testIsMappable	1PASS
0.0499316	jalview.datamodel.AlignmentTest.testAlignAs_cdnaAsProteinjalview.datamodel.AlignmentTest.testAlignAs_cdnaAsProtein	1PASS
0.04856361	jalview.datamodel.AlignmentTest.testAlignAs_cdnaAsProtein_singleSequencejalview.datamodel.AlignmentTest.testAlignAs_cdnaAsProtein_singleSequence	1PASS
0.04856361	jalview.project.Jalview2xmlTests.testRNAStructureRecoveryjalview.project.Jalview2xmlTests.testRNAStructureRecovery	1PASS
0.03898769	jalview.io.AnnotatedPDBFileInputTest.checkPDBSequenceFeaturesjalview.io.AnnotatedPDBFileInputTest.checkPDBSequenceFeatures	1PASS
0.03967168	jalview.analysis.AlignmentUtilsTests.testExpandContextjalview.analysis.AlignmentUtilsTests.testExpandContext	1PASS
0.038303692	jalview.analysis.AlignmentUtilsTests.testMapCdsToProteinjalview.analysis.AlignmentUtilsTests.testMapCdsToProtein	1PASS
0.036935706	jalview.project.Jalview2xmlTests.testPAEsaveRestorejalview.project.Jalview2xmlTests.testPAEsaveRestore	1PASS
0.036935706	jalview.analysis.scoremodels.SecondaryStructureDistanceModelTest.testFindDistances_withSSUndefinedInEitherOneSeqjalview.analysis.scoremodels.SecondaryStructureDistanceModelTest.testFindDistances_withSSUndefinedInEitherOneSeq	1PASS
0.0376197	jalview.analysis.AlignmentUtilsTests.testAddReferenceContactMapjalview.analysis.AlignmentUtilsTests.testAddReferenceContactMap	1PASS
0.035567716	jalview.analysis.scoremodels.SecondaryStructureDistanceModelTest.testFindDistances_withGapjalview.analysis.scoremodels.SecondaryStructureDistanceModelTest.testFindDistances_withGap	1PASS
0.035567716	jalview.io.AnnotatedPDBFileInputTest.checkNoDuplicatesjalview.io.AnnotatedPDBFileInputTest.checkNoDuplicates	1PASS
0.033515733	jalview.analysis.AlignmentUtilsTests.testAddReferenceAnnotationsjalview.analysis.AlignmentUtilsTests.testAddReferenceAnnotations	1PASS
0.03146375	jalview.project.Jalview2xmlTests.testCopyViewSettingsjalview.project.Jalview2xmlTests.testCopyViewSettings	1PASS
0.032147743	jalview.analysis.AlignmentUtilsTests.testExpandContext_annotationjalview.analysis.AlignmentUtilsTests.testExpandContext_annotation	1PASS
0.03009576	jalview.analysis.AlignmentUtilsTests.testMapCdnaToProtein_forSubsequencejalview.analysis.AlignmentUtilsTests.testMapCdnaToProtein_forSubsequence	1PASS
0.026675787	jalview.analysis.AlignmentUtilsTests.testTransferFeaturesjalview.analysis.AlignmentUtilsTests.testTransferFeatures	1PASS
0.025307797	jalview.analysis.AlignmentUtilsTests.testAlignAsSameSequencesMultipleSubSeqjalview.analysis.AlignmentUtilsTests.testAlignAsSameSequencesMultipleSubSeq	1PASS
0.025991792	jalview.analysis.AlignmentUtilsTests.testTranslatesAsjalview.analysis.AlignmentUtilsTests.testTranslatesAs	1PASS
0.025991792	jalview.analysis.AlignmentUtilsTests.testAlignAsSameSequencesjalview.analysis.AlignmentUtilsTests.testAlignAsSameSequences	1PASS
0.023939809	jalview.io.JSONFileTest.testBioJSONRoundTripWithColourSchemeNonejalview.io.JSONFileTest.testBioJSONRoundTripWithColourSchemeNone	1PASS
0.024623804	jalview.project.Jalview2xmlTests.testStoreAndRecoverColourThresholdsjalview.project.Jalview2xmlTests.testStoreAndRecoverColourThresholds	1PASS
0.024623804	jalview.project.Jalview2xmlTests.testColourByAnnotScoresjalview.project.Jalview2xmlTests.testColourByAnnotScores	1PASS
0.023255814	jalview.analysis.scoremodels.SecondaryStructureDistanceModelTest.testFindDistances_withSSUndefinedInBothSeqsjalview.analysis.scoremodels.SecondaryStructureDistanceModelTest.testFindDistances_withSSUndefinedInBothSeqs	1PASS
0.023255814	jalview.gui.AlignViewportTest.testGetSelectionAsNewSequences_withContactMatricesjalview.gui.AlignViewportTest.testGetSelectionAsNewSequences_withContactMatrices	1PASS
0.023939809	jalview.project.Jalview2xmlTests.testStoreAndRestoreIDwidthAndAnnotationHeightjalview.project.Jalview2xmlTests.testStoreAndRestoreIDwidthAndAnnotationHeight	1PASS
0.023939809	jalview.project.Jalview2xmlTests.gatherViewsHerejalview.project.Jalview2xmlTests.gatherViewsHere	1PASS
0.023939809	jalview.project.Jalview2xmlTests.testAutoShowOverviewForLegacyProjectsjalview.project.Jalview2xmlTests.testAutoShowOverviewForLegacyProjects	1PASS
0.023939809	jalview.project.Jalview2xmlTests.noDuplicatePdbMappingsMadejalview.project.Jalview2xmlTests.noDuplicatePdbMappingsMade	1PASS
0.023939809	jalview.project.Jalview2xmlTests.testStoreAndRecoverExpandedviewsjalview.project.Jalview2xmlTests.testStoreAndRecoverExpandedviews	1PASS
0.024623804	jalview.gui.AlignFrameTest.testNewView_colourThresholdsjalview.gui.AlignFrameTest.testNewView_colourThresholds	1PASS
0.024623804	jalview.project.Jalview2xmlTests.testTCoffeeScoresjalview.project.Jalview2xmlTests.testTCoffeeScores	1PASS
0.023939809	jalview.project.Jalview2xmlTests.viewRefPdbAnnotationjalview.project.Jalview2xmlTests.viewRefPdbAnnotation	1PASS
0.023939809	jalview.project.Jalview2xmlTests.testStoreAndRecoverReferenceSeqSettingsjalview.project.Jalview2xmlTests.testStoreAndRecoverReferenceSeqSettings	1PASS
0.024623804	jalview.analysis.AlignmentUtilsTests.testTransferFeatures_withOmitjalview.analysis.AlignmentUtilsTests.testTransferFeatures_withOmit	1PASS
0.024623804	jalview.gui.ColourMenuHelperTest.testAddMenuItems_nucleotidejalview.gui.ColourMenuHelperTest.testAddMenuItems_nucleotide	1PASS
0.023255814	jalview.project.Jalview2xmlTests.testStoreAndRecoverAnnotationRowElementColoursjalview.project.Jalview2xmlTests.testStoreAndRecoverAnnotationRowElementColours	1PASS
0.021887826	jalview.analysis.AlignmentUtilsTests.testAddMappedPositions_withStopCodonjalview.analysis.AlignmentUtilsTests.testAddMappedPositions_withStopCodon	1PASS
0.02120383	jalview.bin.CommandsTest.commandsOpenTestjalview.bin.CommandsTest.commandsOpenTest	1PASS
0.021887826	jalview.analysis.AlignmentUtilsTests.testAddMappedPositionsjalview.analysis.AlignmentUtilsTests.testAddMappedPositions	1PASS
0.01983584	jalview.analysis.AlignmentUtilsTests.testFindCdsForProtein_noUTRjalview.analysis.AlignmentUtilsTests.testFindCdsForProtein_noUTR	1PASS
0.020519836	jalview.analysis.AlignmentUtilsTests.testTransferFeatures_withSelectjalview.analysis.AlignmentUtilsTests.testTransferFeatures_withSelect	1PASS
0.02120383	jalview.bin.CommandsTest.commandsOpenTestjalview.bin.CommandsTest.commandsOpenTest	1PASS
0.02120383	jalview.bin.CommandsTest.commandsOpenTestjalview.bin.CommandsTest.commandsOpenTest	1PASS
0.018467853	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_alreadyAddedjalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_alreadyAdded	1PASS
0.019151846	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenujalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu	1PASS
0.019151846	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_notOnAlignmentjalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_notOnAlignment	1PASS
0.018467853	jalview.io.AnnotationFileIOTest.exampleAnnotationFileIOjalview.io.AnnotationFileIOTest.exampleAnnotationFileIO	1PASS
0.017099863	jalview.gui.PairwiseAlignmentPanelTest.testConstructor_noSelectionGroupjalview.gui.PairwiseAlignmentPanelTest.testConstructor_noSelectionGroup	1PASS
0.017099863	jalview.io.AnnotationExporterTest.testAnnotationExportAsCSVjalview.io.AnnotationExporterTest.testAnnotationExportAsCSV	1PASS
0.017099863	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.017099863	jalview.analysis.scoremodels.FeatureDistanceModelTest.testFeatureScoreModel_HiddenColumnsjalview.analysis.scoremodels.FeatureDistanceModelTest.testFeatureScoreModel_HiddenColumns	1PASS
0.017099863	jalview.gui.AlignViewportTest.testSetGetHasSearchResultsjalview.gui.AlignViewportTest.testSetGetHasSearchResults	1PASS
0.017099863	jalview.schemes.ColourSchemesTest.testGetColourSchemejalview.schemes.ColourSchemesTest.testGetColourScheme	1PASS
0.017099863	jalview.gui.PaintRefresherTest.testGetAssociatedPanelsjalview.gui.PaintRefresherTest.testGetAssociatedPanels	1PASS
0.017099863	jalview.io.JSONFileTest.testGrpParsed_colourNonejalview.io.JSONFileTest.testGrpParsed_colourNone	1PASS
0.017099863	jalview.structures.models.AAStructureBindingModelTest.testBuildColoursMapjalview.structures.models.AAStructureBindingModelTest.testBuildColoursMap	1PASS
0.017099863	jalview.io.vcf.VCFLoaderTest.testDoLoad_reverseStrandjalview.io.vcf.VCFLoaderTest.testDoLoad_reverseStrand	1PASS
0.017099863	jalview.io.FeaturesFileTest.testPrintJalviewFormat_withFiltersjalview.io.FeaturesFileTest.testPrintJalviewFormat_withFilters	1PASS
0.017099863	jalview.project.Jalview2xmlTests.testStoreAndRecoverGeneLocusjalview.project.Jalview2xmlTests.testStoreAndRecoverGeneLocus	1PASS
0.017099863	jalview.bin.CommandsTest.commandsOpenTestjalview.bin.CommandsTest.commandsOpenTest	1PASS
0.017099863	jalview.schemes.ClustalxColourSchemeTest.testFindColourjalview.schemes.ClustalxColourSchemeTest.testFindColour	1PASS
0.017099863	jalview.project.Jalview2xmlTests.testMergeDatasetsforViewsjalview.project.Jalview2xmlTests.testMergeDatasetsforViews	1PASS
0.017099863	jalview.bin.CommandsTest.commandsOpenTestjalview.bin.CommandsTest.commandsOpenTest	1PASS
0.017099863	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.017099863	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.017099863	jalview.io.gff.ExonerateHelperTest.testAddExonerateGffToAlignmentjalview.io.gff.ExonerateHelperTest.testAddExonerateGffToAlignment	1PASS
0.017099863	jalview.controller.AlignViewControllerTest.testSelectColumnsWithHighlightjalview.controller.AlignViewControllerTest.testSelectColumnsWithHighlight	1PASS
0.017099863	jalview.schemes.ColourSchemesTest.testRegisterColourSchemejalview.schemes.ColourSchemesTest.testRegisterColourScheme	1PASS
0.017099863	jalview.io.BackupFilesTest.backupsEnabledNoRollMaxTestjalview.io.BackupFilesTest.backupsEnabledNoRollMaxTest	1PASS
0.017099863	jalview.gui.SeqPanelTest.testFindMousePosition_wrapped_scales_longSequencejalview.gui.SeqPanelTest.testFindMousePosition_wrapped_scales_longSequence	1PASS
0.017099863	jalview.gui.QuitHandlerTest.testUnsavedChangesjalview.gui.QuitHandlerTest.testUnsavedChanges	1PASS
0.017099863	jalview.renderer.seqfeatures.FeatureRendererTest.testFindFeaturesAtColumnjalview.renderer.seqfeatures.FeatureRendererTest.testFindFeaturesAtColumn	1PASS
0.017099863	jalview.gui.AlignViewportTest.testDeregisterMapping_onCloseViewjalview.gui.AlignViewportTest.testDeregisterMapping_onCloseView	1PASS
0.017099863	jalview.project.Jalview2xmlTests.testStoreAndRecoverGroupRepSeqsjalview.project.Jalview2xmlTests.testStoreAndRecoverGroupRepSeqs	1PASS
0.017099863	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.017099863	jalview.io.AnnotationFileIOTest.testAnnotateAlignmentViewjalview.io.AnnotationFileIOTest.testAnnotateAlignmentView	1PASS
0.017099863	jalview.gui.AlignViewportTest.testSetGlobalColourSchemejalview.gui.AlignViewportTest.testSetGlobalColourScheme	1PASS
0.017099863	jalview.controller.AlignViewControllerTest.testFindColumnsWithFeaturejalview.controller.AlignViewControllerTest.testFindColumnsWithFeature	1PASS
0.017099863	jalview.gui.SeqPanelTest.testSetStatusReturnsNearestResiduePositionjalview.gui.SeqPanelTest.testSetStatusReturnsNearestResiduePosition	1PASS
0.017099863	jalview.gui.ScalePanelTest.testPreventNegativeStartColumnjalview.gui.ScalePanelTest.testPreventNegativeStartColumn	1PASS
0.017099863	jalview.gui.AlignViewportTest.testGetConsensusSeqjalview.gui.AlignViewportTest.testGetConsensusSeq	1PASS
0.017099863	jalview.gui.QuitHandlerTest.testSavedAlignmentChangesjalview.gui.QuitHandlerTest.testSavedAlignmentChanges	1PASS
0.017099863	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.017099863	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.017099863	jalview.io.FeaturesFileTest.testParse_pureGff3jalview.io.FeaturesFileTest.testParse_pureGff3	1PASS
0.017099863	jalview.bin.CommandsTest2.structureOpeningArgsTestjalview.bin.CommandsTest2.structureOpeningArgsTest	1PASS
0.017099863	jalview.project.Jalview2xmlTests.testSaveLoadFeatureColoursAndFiltersjalview.project.Jalview2xmlTests.testSaveLoadFeatureColoursAndFilters	1PASS
0.017099863	jalview.gui.SeqPanelTest.testFindMousePosition_wrapped_scaleAbovejalview.gui.SeqPanelTest.testFindMousePosition_wrapped_scaleAbove	1PASS
0.017099863	jalview.gui.DesktopTests.testInternalCopyPastejalview.gui.DesktopTests.testInternalCopyPaste	1PASS
0.017099863	jalview.bin.CommandsTest2.structureOpeningArgsTestjalview.bin.CommandsTest2.structureOpeningArgsTest	1PASS
0.017099863	jalview.gui.QuitHandlerTest.testNoGUIUnsavedChangesjalview.gui.QuitHandlerTest.testNoGUIUnsavedChanges	1PASS
0.017099863	jalview.gui.FeatureSettingsTest.testSaveLoadjalview.gui.FeatureSettingsTest.testSaveLoad	1PASS
0.017099863	jalview.gui.AlignViewportTest.testShowOrDontShowOccupancyjalview.gui.AlignViewportTest.testShowOrDontShowOccupancy	1PASS
0.017099863	jalview.gui.AlignFrameTest.testNewView_dsRefPreservedjalview.gui.AlignFrameTest.testNewView_dsRefPreserved	1PASS
0.017099863	jalview.gui.PairwiseAlignmentPanelTest.testConstructor_withSelectionGroupjalview.gui.PairwiseAlignmentPanelTest.testConstructor_withSelectionGroup	1PASS
0.017099863	jalview.io.vcf.VCFLoaderTest.testDoLoad_vepCsqjalview.io.vcf.VCFLoaderTest.testDoLoad_vepCsq	1PASS
0.017099863	jalview.gui.StructureChooserTest.openStructureFileForSequenceTestjalview.gui.StructureChooserTest.openStructureFileForSequenceTest	1PASS
0.017099863	jalview.analysis.scoremodels.FeatureDistanceModelTest.testFindFeatureAt_PointFeaturejalview.analysis.scoremodels.FeatureDistanceModelTest.testFindFeatureAt_PointFeature	1PASS
0.017099863	jalview.gui.SeqPanelTest.testFindColumn_and_FindAlignmentColumn_wrappedjalview.gui.SeqPanelTest.testFindColumn_and_FindAlignmentColumn_wrapped	1PASS
0.017099863	jalview.gui.StructureChooserTest.openStructureFileForSequenceTestjalview.gui.StructureChooserTest.openStructureFileForSequenceTest	1PASS
0.017099863	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.017099863	jalview.analysis.AverageDistanceEngineTest.testUPGMAEnginejalview.analysis.AverageDistanceEngineTest.testUPGMAEngine	1PASS
0.017099863	jalview.gui.AlignmentPanelTest.testSetOverviewTitlejalview.gui.AlignmentPanelTest.testSetOverviewTitle	1PASS
0.017099863	jalview.io.FeaturesFileTest.testParsejalview.io.FeaturesFileTest.testParse	1PASS
0.017099863	jalview.bin.CommandsTest.commandsOpenTestjalview.bin.CommandsTest.commandsOpenTest	1PASS
0.017099863	jalview.io.BackupFilesTest.backupsEnabledRollMaxTestjalview.io.BackupFilesTest.backupsEnabledRollMaxTest	1PASS
0.017099863	jalview.analysis.scoremodels.FeatureDistanceModelTest.testFeatureScoreModeljalview.analysis.scoremodels.FeatureDistanceModelTest.testFeatureScoreModel	1PASS
0.017099863	jalview.renderer.ScaleRendererTest.testCalculateMarksjalview.renderer.ScaleRendererTest.testCalculateMarks	1PASS
0.017099863	jalview.gui.QuitHandlerTest.testInstantQuitjalview.gui.QuitHandlerTest.testInstantQuit	1PASS
0.017099863	jalview.gui.QuitHandlerTest.testForceQuitjalview.gui.QuitHandlerTest.testForceQuit	1PASS
0.017099863	jalview.renderer.seqfeatures.FeatureRendererTest.testFilterFeaturesForDisplayjalview.renderer.seqfeatures.FeatureRendererTest.testFilterFeaturesForDisplay	1PASS
0.017099863	jalview.analysis.scoremodels.FeatureDistanceModelTest.testFindDistancesjalview.analysis.scoremodels.FeatureDistanceModelTest.testFindDistances	1PASS
0.017099863	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.017099863	jalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTestjalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTest	1PASS
0.017099863	jalview.schemes.ClustalxColourSchemeTest.testFindColour_ignoreGapsjalview.schemes.ClustalxColourSchemeTest.testFindColour_ignoreGaps	1PASS
0.017099863	jalview.project.Jalview2xmlTests.testStoreAndRecoverNoOverviewjalview.project.Jalview2xmlTests.testStoreAndRecoverNoOverview	1PASS
0.017099863	jalview.gui.SeqPanelTest.testFindMousePosition_wrapped_noAnnotationsjalview.gui.SeqPanelTest.testFindMousePosition_wrapped_noAnnotations	1PASS
0.017099863	jalview.gui.ScalePanelTest.testBuildPopupMenujalview.gui.ScalePanelTest.testBuildPopupMenu	1PASS
0.017099863	jalview.io.FeaturesFileTest.testParse_mixedJalviewGffjalview.io.FeaturesFileTest.testParse_mixedJalviewGff	1PASS
0.017099863	jalview.renderer.seqfeatures.FeatureRendererTest.testGetColourjalview.renderer.seqfeatures.FeatureRendererTest.testGetColour	1PASS
0.017099863	jalview.gui.AlignViewportTest.testSetSelectionGroupjalview.gui.AlignViewportTest.testSetSelectionGroup	1PASS
0.017099863	jalview.bin.CommandsTest.structureImageAnnotationsOutputTestjalview.bin.CommandsTest.structureImageAnnotationsOutputTest	1PASS
0.017099863	jalview.gui.QuitHandlerTest.testWaitForSaveQuitjalview.gui.QuitHandlerTest.testWaitForSaveQuit	1PASS
0.017099863	jalview.bin.CommandsTest.structureImageAnnotationsOutputTestjalview.bin.CommandsTest.structureImageAnnotationsOutputTest	1PASS
0.017099863	jalview.io.JalviewExportPropertiesTests.testImportExportPeriodGapsjalview.io.JalviewExportPropertiesTests.testImportExportPeriodGaps	1PASS
0.017099863	jalview.project.Jalview2xmlTests.testMergeDatasetsforManyViewsjalview.project.Jalview2xmlTests.testMergeDatasetsforManyViews	1PASS
0.017099863	jalview.gui.QuitHandlerTest.testSavedProjectChangesjalview.gui.QuitHandlerTest.testSavedProjectChanges	1PASS
0.017099863	jalview.gui.AlignmentPanelTest.testSetOverviewTitle_automaticOverviewjalview.gui.AlignmentPanelTest.testSetOverviewTitle_automaticOverview	1PASS
0.017099863	jalview.renderer.seqfeatures.FeatureRendererTest.testFindAllFeaturesjalview.renderer.seqfeatures.FeatureRendererTest.testFindAllFeatures	1PASS
0.017099863	jalview.bin.CommandsTest.commandsOpenTestjalview.bin.CommandsTest.commandsOpenTest	1PASS
0.017099863	jalview.io.BackupFilesTest.backupsEnabledSingleFileBackupTestjalview.io.BackupFilesTest.backupsEnabledSingleFileBackupTest	1PASS
0.017099863	jalview.analysis.scoremodels.FeatureDistanceModelTest.testFeatureScoreModel_hiddenFirstColumnjalview.analysis.scoremodels.FeatureDistanceModelTest.testFeatureScoreModel_hiddenFirstColumn	1PASS
0.017099863	jalview.bin.CommandsTest.commandsOpenTestjalview.bin.CommandsTest.commandsOpenTest	1PASS
0.017099863	jalview.io.FeaturesFileTest.testPrintGffFormat_withFiltersjalview.io.FeaturesFileTest.testPrintGffFormat_withFilters	1PASS
0.017099863	jalview.gui.AlignFrameTest.testHideFeatureColumnsjalview.gui.AlignFrameTest.testHideFeatureColumns	1PASS
0.017099863	jalview.gui.SeqPanelTest.testFindMousePosition_wrapped_annotationsjalview.gui.SeqPanelTest.testFindMousePosition_wrapped_annotations	1PASS
0.017099863	jalview.io.FeaturesFileTest.testPrintGffFormatjalview.io.FeaturesFileTest.testPrintGffFormat	1PASS
0.017099863	jalview.schemes.PIDColourSchemeTest.testFindColour_ignoreGapsjalview.schemes.PIDColourSchemeTest.testFindColour_ignoreGaps	1PASS
0.017099863	jalview.gui.SeqPanelTest.testAmbiguousAminoAcidGetsStatusMessagejalview.gui.SeqPanelTest.testAmbiguousAminoAcidGetsStatusMessage	1PASS
0.017099863	jalview.io.vcf.VCFLoaderTest.testDoLoadjalview.io.vcf.VCFLoaderTest.testDoLoad	1PASS
0.017099863	jalview.renderer.seqfeatures.FeatureRendererTest.testFindComplementFeaturesAtResiduejalview.renderer.seqfeatures.FeatureRendererTest.testFindComplementFeaturesAtResidue	1PASS
0.017099863	jalview.bin.CommandsTest.commandsOpenTestjalview.bin.CommandsTest.commandsOpenTest	1PASS
0.017099863	jalview.gui.ScalePanelTest.testSelectColumns_withHiddenjalview.gui.ScalePanelTest.testSelectColumns_withHidden	1PASS
0.017099863	jalview.analysis.scoremodels.FeatureDistanceModelTest.testFindDistances_withParamsjalview.analysis.scoremodels.FeatureDistanceModelTest.testFindDistances_withParams	1PASS
0.017099863	jalview.bin.CommandsTest2.structureOpeningArgsTestjalview.bin.CommandsTest2.structureOpeningArgsTest	1PASS
0.017099863	jalview.io.FeaturesFileTest.testPrintJalviewFormatjalview.io.FeaturesFileTest.testPrintJalviewFormat	1PASS
0.017099863	jalview.ext.jmol.JmolCommandsTest.testGetColourBySequenceCommands_hiddenColumnsjalview.ext.jmol.JmolCommandsTest.testGetColourBySequenceCommands_hiddenColumns	1PASS
0.017099863	jalview.gui.AlignViewportTest.testUpdateConservation_qualityOnlyjalview.gui.AlignViewportTest.testUpdateConservation_qualityOnly	1PASS
0.017099863	jalview.project.Jalview2xmlTests.testStoreAndRecoverOverviewjalview.project.Jalview2xmlTests.testStoreAndRecoverOverview	1PASS
0.017099863	jalview.io.WindowsFileLoadAndSaveTest.loadAndSaveAlignmentjalview.io.WindowsFileLoadAndSaveTest.loadAndSaveAlignment	1PASS
0.017099863	jalview.bin.CommandsTest.commandsOpenTestjalview.bin.CommandsTest.commandsOpenTest	1PASS
0.017099863	jalview.datamodel.AlignmentViewTest.testGetVisibleContigsjalview.datamodel.AlignmentViewTest.testGetVisibleContigs	1PASS
0.017099863	jalview.analysis.AlignmentUtilsTests.testFindCdsPositions_fivePrimeIncompletejalview.analysis.AlignmentUtilsTests.testFindCdsPositions_fivePrimeIncomplete	1PASS
0.017099863	jalview.project.Jalview2xmlTests.testPcaViewAssociationjalview.project.Jalview2xmlTests.testPcaViewAssociation	1PASS
0.017099863	jalview.io.gff.GffTests.testResolveExonerateGffjalview.io.gff.GffTests.testResolveExonerateGff	1PASS
0.017099863	jalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTestjalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTest	1PASS
0.017099863	jalview.io.BackupFilesTest.backupsEnabledReverseRollMaxTestjalview.io.BackupFilesTest.backupsEnabledReverseRollMaxTest	1PASS
0.017099863	jalview.renderer.seqfeatures.FeatureRendererTest.testIsVisiblejalview.renderer.seqfeatures.FeatureRendererTest.testIsVisible	1PASS
0.017099863	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.017099863	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.017099863	jalview.bin.CommandsTest.structureImageAnnotationsOutputTestjalview.bin.CommandsTest.structureImageAnnotationsOutputTest	1PASS
0.017099863	jalview.io.BackupFilesTest.noBackupsEnabledTestjalview.io.BackupFilesTest.noBackupsEnabledTest	1PASS
0.017099863	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.017099863	jalview.gui.AlignViewportTest.testDeregisterMapping_withNoReferencejalview.gui.AlignViewportTest.testDeregisterMapping_withNoReference	1PASS
0.017099863	jalview.bin.CommandsTest2.structureOpeningArgsTestjalview.bin.CommandsTest2.structureOpeningArgsTest	1PASS
0.017099863	jalview.gui.AlignViewportTest.testDeregisterMapping_withReferencejalview.gui.AlignViewportTest.testDeregisterMapping_withReference	1PASS
0.017099863	jalview.analysis.AlignmentUtilsTests.testFindCdsForProteinjalview.analysis.AlignmentUtilsTests.testFindCdsForProtein	1PASS
0.017099863	jalview.gui.ColourMenuHelperTest.testAddMenuItems_forAlignFramejalview.gui.ColourMenuHelperTest.testAddMenuItems_forAlignFrame	1PASS
0.017099863	jalview.bin.CommandsTest.structureImageAnnotationsOutputTestjalview.bin.CommandsTest.structureImageAnnotationsOutputTest	1PASS
0.017099863	jalview.bin.CommandsTest2.structureOpeningArgsTestjalview.bin.CommandsTest2.structureOpeningArgsTest	1PASS
0.013679891	jalview.analysis.AlignmentUtilsTests.testHaveCrossRefjalview.analysis.AlignmentUtilsTests.testHaveCrossRef	1PASS
0.012995896	jalview.analysis.AlignmentUtilsTests.testFindCdsPositionsjalview.analysis.AlignmentUtilsTests.testFindCdsPositions	1PASS
0.013679891	jalview.analysis.AlignmentUtilsTests.testHasCrossRefjalview.analysis.AlignmentUtilsTests.testHasCrossRef	1PASS
0.012311902	jalview.datamodel.PAEContactMatrixTest.testSeqAssociatedPAEMatrixjalview.datamodel.PAEContactMatrixTest.testSeqAssociatedPAEMatrix	1PASS
0.010943913	jalview.analysis.DnaTest.testReverseCdnajalview.analysis.DnaTest.testReverseCdna	1PASS
0.010943913	jalview.renderer.ResidueColourFinderTest.testGetResidueColour_zappojalview.renderer.ResidueColourFinderTest.testGetResidueColour_zappo	1PASS
0.010943913	jalview.renderer.ResidueColourFinderTest.testGetResidueColour_userdefjalview.renderer.ResidueColourFinderTest.testGetResidueColour_userdef	1PASS
0.010943913	jalview.renderer.OverviewResColourFinderTest.testGetResidueBoxColour_nonejalview.renderer.OverviewResColourFinderTest.testGetResidueBoxColour_none	1PASS
0.010943913	jalview.analysis.DnaTest.testTranslateCdna_withUntranslatableCodonsAndHiddenColumnsjalview.analysis.DnaTest.testTranslateCdna_withUntranslatableCodonsAndHiddenColumns	1PASS
0.010943913	jalview.datamodel.HiddenSequencesTest.testHideShowSequence_withHiddenRepSequencejalview.datamodel.HiddenSequencesTest.testHideShowSequence_withHiddenRepSequence	1PASS
0.010943913	jalview.renderer.ResidueColourFinderTest.testGetResidueColour_nonejalview.renderer.ResidueColourFinderTest.testGetResidueColour_none	1PASS
0.010943913	jalview.util.MappingUtilsTest.testMapColumnSelection_proteinToDnajalview.util.MappingUtilsTest.testMapColumnSelection_proteinToDna	1PASS
0.010943913	jalview.io.FeaturesFileTest.testParse_jalviewFeaturesOnlyjalview.io.FeaturesFileTest.testParse_jalviewFeaturesOnly	1PASS
0.010943913	jalview.analysis.FinderTest.testFind_inDescriptionjalview.analysis.FinderTest.testFind_inDescription	1PASS
0.010943913	jalview.util.MappingUtilsTest.testMapColumnSelection_dnaToProteinjalview.util.MappingUtilsTest.testMapColumnSelection_dnaToProtein	1PASS
0.010943913	jalview.util.MappingUtilsTest.testMapSequenceGroup_sharedDatasetjalview.util.MappingUtilsTest.testMapSequenceGroup_sharedDataset	1PASS
0.010943913	jalview.util.MappingUtilsTest.testMapColumnSelection_hiddenColumnsjalview.util.MappingUtilsTest.testMapColumnSelection_hiddenColumns	1PASS
0.010943913	jalview.gui.SeqPanelTest.testFindColumn_unwrappedjalview.gui.SeqPanelTest.testFindColumn_unwrapped	1PASS
0.010943913	jalview.analysis.FinderTest.testFindAll_sequenceIdsjalview.analysis.FinderTest.testFindAll_sequenceIds	1PASS
0.010943913	jalview.renderer.OverviewResColourFinderTest.testGetResidueBoxColour_zappojalview.renderer.OverviewResColourFinderTest.testGetResidueBoxColour_zappo	1PASS
0.010943913	jalview.util.MappingUtilsTest.testMapSequenceGroup_columnsjalview.util.MappingUtilsTest.testMapSequenceGroup_columns	1PASS
0.010943913	jalview.renderer.OverviewResColourFinderTest.testGetResidueBoxColour_userdefjalview.renderer.OverviewResColourFinderTest.testGetResidueBoxColour_userdef	1PASS
0.010943913	jalview.util.MappingUtilsTest.testMapSequenceGroup_sequencesjalview.util.MappingUtilsTest.testMapSequenceGroup_sequences	1PASS
0.010943913	jalview.util.MappingUtilsTest.testMapColumnSelection_nulljalview.util.MappingUtilsTest.testMapColumnSelection_null	1PASS
0.010943913	jalview.util.MappingUtilsTest.testMapSequenceGroup_regionjalview.util.MappingUtilsTest.testMapSequenceGroup_region	1PASS
0.009575923	jalview.analysis.AlignmentUtilsTests.testTransferGeneLocijalview.analysis.AlignmentUtilsTests.testTransferGeneLoci	1PASS
0.009575923	jalview.analysis.AlignmentUtilsTests.testGetSequencesByNamejalview.analysis.AlignmentUtilsTests.testGetSequencesByName	1PASS
0.00752394	jalview.gui.AnnotationChooserTest.testSelectType_showForAlljalview.gui.AnnotationChooserTest.testSelectType_showForAll	1PASS
0.0068399454	jalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSourcesjalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSources	1PASS
0.00752394	jalview.gui.AnnotationChooserTest.testDeselectType_hideForAlljalview.gui.AnnotationChooserTest.testDeselectType_hideForAll	1PASS
0.00752394	jalview.gui.AnnotationChooserTest.testIsInActionScope_selectedScopejalview.gui.AnnotationChooserTest.testIsInActionScope_selectedScope	1PASS
0.00752394	jalview.gui.AnnotationChooserTest.testSelectType_showForSelectedjalview.gui.AnnotationChooserTest.testSelectType_showForSelected	1PASS
0.00752394	jalview.gui.AnnotationChooserTest.testIsInActionScope_unselectedScopejalview.gui.AnnotationChooserTest.testIsInActionScope_unselectedScope	1PASS
0.00752394	jalview.analysis.AnnotationSorterTest.testSortBySequenceAndType_autocalcLastjalview.analysis.AnnotationSorterTest.testSortBySequenceAndType_autocalcLast	1PASS
0.0068399454	jalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSourcesjalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSources	1PASS
0.00752394	jalview.analysis.AnnotationSorterTest.testSortByTypeAndSequence_autocalcFirstjalview.analysis.AnnotationSorterTest.testSortByTypeAndSequence_autocalcFirst	1PASS
0.00752394	jalview.gui.AnnotationChooserTest.testDeselectType_showForAlljalview.gui.AnnotationChooserTest.testDeselectType_showForAll	1PASS
0.00752394	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_noReferenceAnnotationsjalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_noReferenceAnnotations	1PASS
0.00752394	jalview.analysis.AnnotationSorterTest.testSort_timingUnsortedjalview.analysis.AnnotationSorterTest.testSort_timingUnsorted	1PASS
0.008207934	jalview.analysis.AlignmentUtilsTests.testShowOrHideSequenceAnnotationsjalview.analysis.AlignmentUtilsTests.testShowOrHideSequenceAnnotations	1PASS
0.00752394	jalview.analysis.AnnotationSorterTest.testSortByTypeAndSequence_autocalcLastjalview.analysis.AnnotationSorterTest.testSortByTypeAndSequence_autocalcLast	1PASS
0.00752394	jalview.analysis.AnnotationSorterTest.testSort_timingSemisortedjalview.analysis.AnnotationSorterTest.testSort_timingSemisorted	1PASS
0.00752394	jalview.analysis.AnnotationSorterTest.testNoSort_autocalcFirstjalview.analysis.AnnotationSorterTest.testNoSort_autocalcFirst	1PASS
0.00752394	jalview.gui.AnnotationChooserTest.testDeselectType_showForSelectedjalview.gui.AnnotationChooserTest.testDeselectType_showForSelected	1PASS
0.00752394	jalview.gui.AnnotationColumnChooserTest.testResetjalview.gui.AnnotationColumnChooserTest.testReset	1PASS
0.00752394	jalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSourcesjalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSources	1PASS
0.00752394	jalview.analysis.AnnotationSorterTest.testSortBySequenceAndType_autocalcFirstjalview.analysis.AnnotationSorterTest.testSortBySequenceAndType_autocalcFirst	1PASS
0.00752394	jalview.gui.AnnotationChooserTest.testSelectType_hideForSelectedjalview.gui.AnnotationChooserTest.testSelectType_hideForSelected	1PASS
0.00752394	jalview.analysis.AnnotationSorterTest.testSort_timingPresortedjalview.analysis.AnnotationSorterTest.testSort_timingPresorted	1PASS
0.00752394	jalview.gui.AnnotationChooserTest.testResetOriginalStatejalview.gui.AnnotationChooserTest.testResetOriginalState	1PASS
0.00752394	jalview.gui.AnnotationChooserTest.testSelectType_hideForAlljalview.gui.AnnotationChooserTest.testSelectType_hideForAll	1PASS
0.00752394	jalview.gui.AlignFrameTest.testChangeColour_background_groupsAndThresholdsjalview.gui.AlignFrameTest.testChangeColour_background_groupsAndThresholds	1PASS
0.00752394	jalview.gui.PopupMenuTest.testHideInsertionsjalview.gui.PopupMenuTest.testHideInsertions	1PASS
0.00752394	jalview.gui.AnnotationChooserTest.testDeselectType_hideForSelectedjalview.gui.AnnotationChooserTest.testDeselectType_hideForSelected	1PASS
0.006155951	jalview.workers.AlignCalcManagerTest.testRemoveWorkerForAnnotationjalview.workers.AlignCalcManagerTest.testRemoveWorkerForAnnotation	1PASS
0.006155951	jalview.analysis.GroupingTest.testMakeGroupsWithBothjalview.analysis.GroupingTest.testMakeGroupsWithBoth	1PASS
0.006155951	jalview.ext.jmol.JmolParserTest.testFileParserjalview.ext.jmol.JmolParserTest.testFileParser	1PASS
0.004103967	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_noSequenceSelectedjalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_noSequenceSelected	1PASS

Source view

* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)

* Copyright (C) $$Year-Rel$$ The Jalview Authors

* This file is part of Jalview.

* Jalview is free software: you can redistribute it and/or

* modify it under the terms of the GNU General Public License

* as published by the Free Software Foundation, either version 3

* of the License, or (at your option) any later version.

* Jalview is distributed in the hope that it will be useful, but

* WITHOUT ANY WARRANTY; without even the implied warranty

* of MERCHANTABILITY or FITNESS FOR A PARTICULAR

* PURPOSE. See the GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with Jalview. If not, see <http://www.gnu.org/licenses/>.

* The Jalview Authors are detailed in the 'AUTHORS' file.

package jalview.analysis;

import java.awt.Color;

import java.util.ArrayList;

import java.util.Arrays;

import java.util.Collection;

import java.util.Collections;

import java.util.HashMap;

import java.util.HashSet;

import java.util.Iterator;

import java.util.LinkedHashMap;

import java.util.List;

import java.util.Locale;

import java.util.Map;

import java.util.Map.Entry;

import java.util.NoSuchElementException;

import java.util.Set;

import java.util.SortedMap;

import java.util.TreeMap;

import java.util.Vector;

import java.util.stream.Collectors;

import org.jcolorbrewer.ColorBrewer;

import jalview.api.AlignCalcWorkerI;

import jalview.bin.Console;

import jalview.commands.RemoveGapColCommand;

import jalview.datamodel.AlignedCodon;

import jalview.datamodel.AlignedCodonFrame;

import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;

import jalview.datamodel.Alignment;

import jalview.datamodel.AlignmentAnnotation;

import jalview.datamodel.AlignmentI;

import jalview.datamodel.Annotation;

import jalview.datamodel.ContactMatrixI;

import jalview.datamodel.DBRefEntry;

import jalview.datamodel.GeneLociI;

import jalview.datamodel.IncompleteCodonException;

import jalview.datamodel.Mapping;

import jalview.datamodel.PDBEntry;

import jalview.datamodel.SeqCigar;

import jalview.datamodel.Sequence;

import jalview.datamodel.SequenceFeature;

import jalview.datamodel.SequenceGroup;

import jalview.datamodel.SequenceI;

import jalview.datamodel.features.SequenceFeatures;

import jalview.gui.AlignmentPanel;

import jalview.io.gff.SequenceOntologyI;

import jalview.schemes.ResidueProperties;

import jalview.util.Comparison;

import jalview.util.Constants;

import jalview.util.DBRefUtils;

import jalview.util.IntRangeComparator;

import jalview.util.MapList;

import jalview.util.MappingUtils;

import jalview.util.MessageManager;

import jalview.workers.SecondaryStructureConsensusThread;

/**

* grab bag of useful alignment manipulation operations Expect these to be

* refactored elsewhere at some point.

* @author jimp

public class AlignmentUtils

{

private static final int CODON_LENGTH = 3;

private static final String SEQUENCE_VARIANT = "sequence_variant:";

* the 'id' attribute is provided for variant features fetched from

* Ensembl using its REST service with JSON format

public static final String VARIANT_ID = "id";

/**

* A data model to hold the 'normal' base value at a position, and an optional

100

* sequence variant feature

101

102

static final class DnaVariant

{

final String base;

SequenceFeature variant;

107

108

DnaVariant(String nuc)

{

base = nuc;

variant = null;

}

DnaVariant(String nuc, SequenceFeature var)

{

base = nuc;

variant = var;

}

public String getSource()

121

{

122

return variant == null ? null : variant.getFeatureGroup();

}

/**

* toString for aid in the debugger only

127

128

@Override

129

public String toString()

130

{

131

return base + ":" + (variant == null ? "" : variant.getDescription());

}

}

/**

* given an existing alignment, create a new alignment including all, or up to

137

* flankSize additional symbols from each sequence's dataset sequence

* @param core

* @param flankSize

* @return AlignmentI

public static AlignmentI expandContext(AlignmentI core, int flankSize)

144

{

145

List<SequenceI> sq = new ArrayList<>();

146

int maxoffset = 0;

147

for (SequenceI s : core.getSequences())

148

{

149

131

SequenceI newSeq = s.deriveSequence();

150

131

final int newSeqStart = newSeq.getStart() - 1;

151

131

if (newSeqStart > maxoffset

152

&& newSeq.getDatasetSequence().getStart() < s.getStart())

153

{

154

131

maxoffset = newSeqStart;

155

}

156

131

sq.add(newSeq);

}

if (flankSize > -1)

{

maxoffset = Math.min(maxoffset, flankSize);

}

* now add offset left and right to create an expanded alignment

165

166

for (SequenceI s : sq)

167

{

168

131

SequenceI ds = s;

169

262

while (ds.getDatasetSequence() != null)

170

{

171

131

ds = ds.getDatasetSequence();

172

}

173

131

int s_end = s.findPosition(s.getStart() + s.getLength());

174

// find available flanking residues for sequence

175

131

int ustream_ds = s.getStart() - ds.getStart();

176

131

int dstream_ds = ds.getEnd() - s_end;

177

178

// build new flanked sequence

179

180

// compute gap padding to start of flanking sequence

181

131

int offset = maxoffset - ustream_ds;

182

183

// padding is gapChar x ( maxoffset - min(ustream_ds, flank)

184

131

if (flankSize >= 0)

185

{

186

125

if (flankSize < ustream_ds)

187

{

188

// take up to flankSize residues

189

offset = maxoffset - flankSize;

190

ustream_ds = flankSize;

191

}

192

125

if (flankSize <= dstream_ds)

193

{

194

116

dstream_ds = flankSize - 1;

195

}

196

}

197

// TODO use Character.toLowerCase to avoid creating String objects?

198

131

char[] upstream = new String(ds

199

.getSequence(s.getStart() - 1 - ustream_ds, s.getStart() - 1))

200

.toLowerCase(Locale.ROOT).toCharArray();

201

131

char[] downstream = new String(

202

ds.getSequence(s_end - 1, s_end + dstream_ds))

203

.toLowerCase(Locale.ROOT).toCharArray();

204

131

char[] coreseq = s.getSequence();

205

131

char[] nseq = new char[offset + upstream.length + downstream.length

206

+ coreseq.length];

207

131

char c = core.getGapCharacter();

208

209

131

int p = 0;

210

461

for (; p < offset; p++)

211

{

212

330

nseq[p] = c;

213

}

214

215

131

System.arraycopy(upstream, 0, nseq, p, upstream.length);

216

131

System.arraycopy(coreseq, 0, nseq, p + upstream.length,

217

coreseq.length);

218

131

System.arraycopy(downstream, 0, nseq,

219

p + coreseq.length + upstream.length, downstream.length);

220

131

s.setSequence(new String(nseq));

221

131

s.setStart(s.getStart() - ustream_ds);

222

131

s.setEnd(s_end + downstream.length);

223

}

224

AlignmentI newAl = new jalview.datamodel.Alignment(

225

sq.toArray(new SequenceI[0]));

226

for (SequenceI s : sq)

227

{

228

131

if (s.getAnnotation() != null)

229

{

230

for (AlignmentAnnotation aa : s.getAnnotation())

231

{

232

aa.adjustForAlignment(); // JAL-1712 fix

233

newAl.addAnnotation(aa);

}

}

}

newAl.setDataset(core.getDataset());

return newAl;

}

/**

* Returns the index (zero-based position) of a sequence in an alignment, or

* -1 if not found.

* @param al

* @param seq

* @return

59568

public static int getSequenceIndex(AlignmentI al, SequenceI seq)

250

{

251

59568

int result = -1;

252

59568

int pos = 0;

253

59568

for (SequenceI alSeq : al.getSequences())

254

{

255

126272048

if (alSeq == seq)

256

{

257

59510

result = pos;

258

59510

break;

259

}

260

126212538

pos++;

261

}

262

59568

return result;

}

/**

* Returns a map of lists of sequences in the alignment, keyed by sequence

267

* name. For use in mapping between different alignment views of the same

268

* sequences.

269

270

* @see jalview.datamodel.AlignmentI#getSequencesByName()

271

272

public static Map<String, List<SequenceI>> getSequencesByName(

273

AlignmentI al)

274

{

275

Map<String, List<SequenceI>> theMap = new LinkedHashMap<>();

276

for (SequenceI seq : al.getSequences())

277

{

278

String name = seq.getName();

279

if (name != null)

280

{

281

List<SequenceI> seqs = theMap.get(name);

282

if (seqs == null)

283

{

284

seqs = new ArrayList<>();

285

theMap.put(name, seqs);

}

seqs.add(seq);

}

}

return theMap;

}

/**

* Build mapping of protein to cDNA alignment. Mappings are made between

295

* sequences where the cDNA translates to the protein sequence. Any new

296

* mappings are added to the protein alignment. Returns true if any mappings

297

* either already exist or were added, else false.

298

299

* @param proteinAlignment

300

* @param cdnaAlignment

301

* @return

302

303

public static boolean mapProteinAlignmentToCdna(

304

final AlignmentI proteinAlignment, final AlignmentI cdnaAlignment)

305

{

306

if (proteinAlignment == null || cdnaAlignment == null)

{

return false;

}

Set<SequenceI> mappedDna = new HashSet<>();

312

Set<SequenceI> mappedProtein = new HashSet<>();

313

314

315

* First pass - map sequences where cross-references exist. This include

316

* 1-to-many mappings to support, for example, variant cDNA.

317

318

boolean mappingPerformed = mapProteinToCdna(proteinAlignment,

319

cdnaAlignment, mappedDna, mappedProtein, true);

320

321

322

* Second pass - map sequences where no cross-references exist. This only

323

* does 1-to-1 mappings and assumes corresponding sequences are in the same

324

* order in the alignments.

325

326

mappingPerformed |= mapProteinToCdna(proteinAlignment, cdnaAlignment,

327

mappedDna, mappedProtein, false);

328

return mappingPerformed;

}

/**

* Make mappings between compatible sequences (where the cDNA translation

333

* matches the protein).

334

335

* @param proteinAlignment

336

* @param cdnaAlignment

337

* @param mappedDna

338

* a set of mapped DNA sequences (to add to)

339

* @param mappedProtein

340

* a set of mapped Protein sequences (to add to)

341

* @param xrefsOnly

342

* if true, only map sequences where xrefs exist

343

* @return

344

345

protected static boolean mapProteinToCdna(

346

final AlignmentI proteinAlignment, final AlignmentI cdnaAlignment,

347

Set<SequenceI> mappedDna, Set<SequenceI> mappedProtein,

348

boolean xrefsOnly)

349

{

350

boolean mappingExistsOrAdded = false;

351

List<SequenceI> thisSeqs = proteinAlignment.getSequences();

352

for (SequenceI aaSeq : thisSeqs)

353

{

354

boolean proteinMapped = false;

355

AlignedCodonFrame acf = new AlignedCodonFrame();

356

357

for (SequenceI cdnaSeq : cdnaAlignment.getSequences())

358

{

359

360

* Always try to map if sequences have xref to each other; this supports

361

* variant cDNA or alternative splicing for a protein sequence.

362

363

* If no xrefs, try to map progressively, assuming that alignments have

364

* mappable sequences in corresponding order. These are not

365

* many-to-many, as that would risk mixing species with similar cDNA

366

* sequences.

367

368

if (xrefsOnly && !AlignmentUtils.haveCrossRef(aaSeq, cdnaSeq))

{

continue;

}

* Don't map non-xrefd sequences more than once each. This heuristic

375

* allows us to pair up similar sequences in ordered alignments.

376

377

if (!xrefsOnly && (mappedProtein.contains(aaSeq)

378

|| mappedDna.contains(cdnaSeq)))

{

continue;

}

if (mappingExists(proteinAlignment.getCodonFrames(),

383

aaSeq.getDatasetSequence(), cdnaSeq.getDatasetSequence()))

384

{

385

mappingExistsOrAdded = true;

}

else

{

MapList map = mapCdnaToProtein(aaSeq, cdnaSeq);

390

if (map != null)

391

{

392

acf.addMap(cdnaSeq, aaSeq, map);

393

mappingExistsOrAdded = true;

394

proteinMapped = true;

395

mappedDna.add(cdnaSeq);

396

mappedProtein.add(aaSeq);

}

}

}

if (proteinMapped)

{

proteinAlignment.addCodonFrame(acf);

403

}

404

}

405

return mappingExistsOrAdded;

}

/**

* Answers true if the mappings include one between the given (dataset)

410

* sequences.

411

412

protected static boolean mappingExists(List<AlignedCodonFrame> mappings,

413

SequenceI aaSeq, SequenceI cdnaSeq)

414

{

415

if (mappings != null)

416

{

417

for (AlignedCodonFrame acf : mappings)

418

{

419

if (cdnaSeq == acf.getDnaForAaSeq(aaSeq))

{

return true;

}

}

}

return false;

}

/**

* Builds a mapping (if possible) of a cDNA to a protein sequence.

430

* <ul>

431

* <li>first checks if the cdna translates exactly to the protein

432

* sequence</li>

433

* <li>else checks for translation after removing a STOP codon</li>

434

* <li>else checks for translation after removing a START codon</li>

435

* <li>if that fails, inspect CDS features on the cDNA sequence</li>

436

* </ul>

437

* Returns null if no mapping is determined.

438

439

* @param proteinSeq

440

* the aligned protein sequence

441

* @param cdnaSeq

442

* the aligned cdna sequence

443

* @return

444

445

public static MapList mapCdnaToProtein(SequenceI proteinSeq,

SequenceI cdnaSeq)

{

* Here we handle either dataset sequence set (desktop) or absent (applet).

450

* Use only the char[] form of the sequence to avoid creating possibly large

451

* String objects.

452

453

final SequenceI proteinDataset = proteinSeq.getDatasetSequence();

454

char[] aaSeqChars = proteinDataset != null

455

? proteinDataset.getSequence()

456

: proteinSeq.getSequence();

457

final SequenceI cdnaDataset = cdnaSeq.getDatasetSequence();

458

char[] cdnaSeqChars = cdnaDataset != null ? cdnaDataset.getSequence()

459

: cdnaSeq.getSequence();

460

if (aaSeqChars == null || cdnaSeqChars == null)

{

return null;

}

* cdnaStart/End, proteinStartEnd are base 1 (for dataset sequence mapping)

467

468

final int mappedLength = CODON_LENGTH * aaSeqChars.length;

469

int cdnaLength = cdnaSeqChars.length;

470

int cdnaStart = cdnaSeq.getStart();

471

int cdnaEnd = cdnaSeq.getEnd();

472

final int proteinStart = proteinSeq.getStart();

473

final int proteinEnd = proteinSeq.getEnd();

474

475

476

* If lengths don't match, try ignoring stop codon (if present)

477

478

if (cdnaLength != mappedLength && cdnaLength > 2)

479

{

480

String lastCodon = String.valueOf(cdnaSeqChars,

481

cdnaLength - CODON_LENGTH, CODON_LENGTH)

482

.toUpperCase(Locale.ROOT);

483

for (String stop : ResidueProperties.STOP_CODONS)

484

{

485

if (lastCodon.equals(stop))

486

{

487

cdnaEnd -= CODON_LENGTH;

488

cdnaLength -= CODON_LENGTH;

break;

}

}

}

* If lengths still don't match, try ignoring start codon.

496

497

int startOffset = 0;

498

if (cdnaLength != mappedLength && cdnaLength > 2

499

&& String.valueOf(cdnaSeqChars, 0, CODON_LENGTH)

500

.toUpperCase(Locale.ROOT)

501

.equals(ResidueProperties.START))

502

{

503

startOffset += CODON_LENGTH;

504

cdnaStart += CODON_LENGTH;

505

cdnaLength -= CODON_LENGTH;

506

}

507

508

if (translatesAs(cdnaSeqChars, startOffset, aaSeqChars))

509

{

510

511

* protein is translation of dna (+/- start/stop codons)

512

513

MapList map = new MapList(new int[] { cdnaStart, cdnaEnd },

514

new int[]

515

{ proteinStart, proteinEnd }, CODON_LENGTH, 1);

return map;

}

* translation failed - try mapping CDS annotated regions of dna

521

522

return mapCdsToProtein(cdnaSeq, proteinSeq);

}

/**

* Test whether the given cdna sequence, starting at the given offset,

527

* translates to the given amino acid sequence, using the standard translation

528

* table. Designed to fail fast i.e. as soon as a mismatch position is found.

529

530

* @param cdnaSeqChars

* @param cdnaStart

* @param aaSeqChars

* @return

protected static boolean translatesAs(char[] cdnaSeqChars, int cdnaStart,

536

char[] aaSeqChars)

537

{

538

if (cdnaSeqChars == null || aaSeqChars == null)

{

return false;

}

int aaPos = 0;

int dnaPos = cdnaStart;

545

161

for (; dnaPos < cdnaSeqChars.length - 2

546

&& aaPos < aaSeqChars.length; dnaPos += CODON_LENGTH, aaPos++)

547

{

548

130

String codon = String.valueOf(cdnaSeqChars, dnaPos, CODON_LENGTH);

549

130

final String translated = ResidueProperties.codonTranslate(codon);

550

551

552

* allow * in protein to match untranslatable in dna

553

554

130

final char aaRes = aaSeqChars[aaPos];

555

130

if ((translated == null || ResidueProperties.STOP.equals(translated))

&& aaRes == '*')

{

continue;

}

126

if (translated == null || !(aaRes == translated.charAt(0)))

561

{

562

// debug

563

// jalview.bin.Console.outPrintln(("Mismatch at " + i + "/" + aaResidue

564

// + ": "

565

// + codon + "(" + translated + ") != " + aaRes));

return false;

}

}

* check we matched all of the protein sequence

572

573

if (aaPos != aaSeqChars.length)

{

return false;

}

* check we matched all of the dna except

580

* for optional trailing STOP codon

581

582

if (dnaPos == cdnaSeqChars.length)

{

return true;

}

if (dnaPos == cdnaSeqChars.length - CODON_LENGTH)

587

{

588

String codon = String.valueOf(cdnaSeqChars, dnaPos, CODON_LENGTH);

589

if (ResidueProperties.STOP

590

.equals(ResidueProperties.codonTranslate(codon)))

{

return true;

}

}

return false;

}

/**

* Align sequence 'seq' to match the alignment of a mapped sequence. Note this

600

* currently assumes that we are aligning cDNA to match protein.

601

602

* @param seq

603

* the sequence to be realigned

604

* @param al

605

* the alignment whose sequence alignment is to be 'copied'

606

* @param gap

607

* character string represent a gap in the realigned sequence

608

* @param preserveUnmappedGaps

609

* @param preserveMappedGaps

610

* @return true if the sequence was realigned, false if it could not be

611

612

public static boolean alignSequenceAs(SequenceI seq, AlignmentI al,

613

String gap, boolean preserveMappedGaps,

614

boolean preserveUnmappedGaps)

615

{

616

617

* Get any mappings from the source alignment to the target (dataset)

618

* sequence.

619

620

// TODO there may be one AlignedCodonFrame per dataset sequence, or one with

621

// all mappings. Would it help to constrain this?

622

List<AlignedCodonFrame> mappings = al.getCodonFrame(seq);

623

if (mappings == null || mappings.isEmpty())

{

return false;

}

* Locate the aligned source sequence whose dataset sequence is mapped. We

630

* just take the first match here (as we can't align like more than one

631

* sequence).

632

633

SequenceI alignFrom = null;

634

AlignedCodonFrame mapping = null;

635

for (AlignedCodonFrame mp : mappings)

636

{

637

alignFrom = mp.findAlignedSequence(seq, al);

638

if (alignFrom != null)

{

mapping = mp;

break;

}

}

if (alignFrom == null)

{

return false;

}

alignSequenceAs(seq, alignFrom, mapping, gap, al.getGapCharacter(),

650

preserveMappedGaps, preserveUnmappedGaps);

return true;

}

/**

* Align sequence 'alignTo' the same way as 'alignFrom', using the mapping to

656

* match residues and codons. Flags control whether existing gaps in unmapped

657

* (intron) and mapped (exon) regions are preserved or not. Gaps between

658

* intron and exon are only retained if both flags are set.

* @param alignTo

* @param alignFrom

* @param mapping

* @param myGap

* @param sourceGap

* @param preserveUnmappedGaps

666

* @param preserveMappedGaps

667

668

public static void alignSequenceAs(SequenceI alignTo, SequenceI alignFrom,

669

AlignedCodonFrame mapping, String myGap, char sourceGap,

670

boolean preserveMappedGaps, boolean preserveUnmappedGaps)

671

{

672

// TODO generalise to work for Protein-Protein, dna-dna, dna-protein

673

674

// aligned and dataset sequence positions, all base zero

int thisSeqPos = 0;

int sourceDsPos = 0;

int basesWritten = 0;

679

char myGapChar = myGap.charAt(0);

680

int ratio = myGap.length();

681

682

int fromOffset = alignFrom.getStart() - 1;

683

int toOffset = alignTo.getStart() - 1;

684

int sourceGapMappedLength = 0;

685

boolean inExon = false;

686

final int toLength = alignTo.getLength();

687

final int fromLength = alignFrom.getLength();

688

StringBuilder thisAligned = new StringBuilder(2 * toLength);

689

690

691

* Traverse the 'model' aligned sequence

692

693

205

for (int i = 0; i < fromLength; i++)

694

{

695

186

char sourceChar = alignFrom.getCharAt(i);

696

186

if (sourceChar == sourceGap)

697

{

698

sourceGapMappedLength += ratio;

continue;

}

* Found a non-gap character. Locate its mapped region if any.

704

705

142

sourceDsPos++;

706

// Note mapping positions are base 1, our sequence positions base 0

707

142

int[] mappedPos = mapping.getMappedRegion(alignTo, alignFrom,

708

sourceDsPos + fromOffset);

709

142

if (mappedPos == null)

710

{

711

712

* unmapped position; treat like a gap

713

714

sourceGapMappedLength += ratio;

715

// jalview.bin.Console.errPrintln("Can't align: no codon mapping to

716

// residue "

717

// + sourceDsPos + "(" + sourceChar + ")");

// return;

continue;

}

int mappedCodonStart = mappedPos[0]; // position (1...) of codon start

723

int mappedCodonEnd = mappedPos[mappedPos.length - 1]; // codon end pos

724

StringBuilder trailingCopiedGap = new StringBuilder();

725

726

727

* Copy dna sequence up to and including this codon. Optionally, include

728

* gaps before the codon starts (in introns) and/or after the codon starts

729

* (in exons).

730

731

* Note this only works for 'linear' splicing, not reverse or interleaved.

732

* But then 'align dna as protein' doesn't make much sense otherwise.

733

734

int intronLength = 0;

735

294

while (basesWritten + toOffset < mappedCodonEnd

736

&& thisSeqPos < toLength)

737

{

738

246

final char c = alignTo.getCharAt(thisSeqPos++);

739

246

if (c != myGapChar)

740

{

741

146

basesWritten++;

742

146

int sourcePosition = basesWritten + toOffset;

743

146

if (sourcePosition < mappedCodonStart)

744

{

745

746

* Found an unmapped (intron) base. First add in any preceding gaps

747

* (if wanted).

748

749

if (preserveUnmappedGaps && trailingCopiedGap.length() > 0)

750

{

751

thisAligned.append(trailingCopiedGap.toString());

752

intronLength += trailingCopiedGap.length();

753

trailingCopiedGap = new StringBuilder();

}

intronLength++;

inExon = false;

}

else

{

final boolean startOfCodon = sourcePosition == mappedCodonStart;

761

int gapsToAdd = calculateGapsToInsert(preserveMappedGaps,

762

preserveUnmappedGaps, sourceGapMappedLength, inExon,

763

trailingCopiedGap.length(), intronLength, startOfCodon);

764

215

for (int k = 0; k < gapsToAdd; k++)

765

{

766

117

thisAligned.append(myGapChar);

767

}

768

sourceGapMappedLength = 0;

769

inExon = true;

770

}

771

146

thisAligned.append(c);

772

146

trailingCopiedGap = new StringBuilder();

}

else

{

100

if (inExon && preserveMappedGaps)

777

{

778

trailingCopiedGap.append(myGapChar);

779

}

780

else if (!inExon && preserveUnmappedGaps)

781

{

782

trailingCopiedGap.append(myGapChar);

}

}

}

}

* At end of model aligned sequence. Copy any remaining target sequence, optionally

790

* including (intron) gaps.

791

792

129

while (thisSeqPos < toLength)

793

{

794

110

final char c = alignTo.getCharAt(thisSeqPos++);

795

110

if (c != myGapChar || preserveUnmappedGaps)

796

{

797

102

thisAligned.append(c);

798

}

799

110

sourceGapMappedLength--;

}

* finally add gaps to pad for any trailing source gaps or

804

* unmapped characters

805

806

if (preserveUnmappedGaps)

807

{

808

while (sourceGapMappedLength > 0)

809

{

810

thisAligned.append(myGapChar);

811

sourceGapMappedLength--;

}

}

* All done aligning, set the aligned sequence.

817

818

alignTo.setSequence(new String(thisAligned));

}

/**

* Helper method to work out how many gaps to insert when realigning.

823

824

* @param preserveMappedGaps

825

* @param preserveUnmappedGaps

826

* @param sourceGapMappedLength

827

* @param inExon

828

* @param trailingCopiedGap

829

* @param intronLength

830

* @param startOfCodon

831

* @return

832

833

protected static int calculateGapsToInsert(boolean preserveMappedGaps,

834

boolean preserveUnmappedGaps, int sourceGapMappedLength,

835

boolean inExon, int trailingGapLength, int intronLength,

836

final boolean startOfCodon)

{

int gapsToAdd = 0;

if (startOfCodon)

{

* Reached start of codon. Ignore trailing gaps in intron unless we are

843

* preserving gaps in both exon and intron. Ignore them anyway if the

844

* protein alignment introduces a gap at least as large as the intronic

845

* region.

846

847

if (inExon && !preserveMappedGaps)

848

{

849

trailingGapLength = 0;

850

}

851

if (!inExon && !(preserveMappedGaps && preserveUnmappedGaps))

852

{

853

trailingGapLength = 0;

}

if (inExon)

{

gapsToAdd = Math.max(sourceGapMappedLength, trailingGapLength);

}

else

{

if (intronLength + trailingGapLength <= sourceGapMappedLength)

862

{

863

gapsToAdd = sourceGapMappedLength - intronLength;

}

else

{

gapsToAdd = Math.min(

868

intronLength + trailingGapLength - sourceGapMappedLength,

trailingGapLength);

}

}

}

else

{

* second or third base of codon; check for any gaps in dna

877

878

if (!preserveMappedGaps)

879

{

880

trailingGapLength = 0;

881

}

882

gapsToAdd = Math.max(sourceGapMappedLength, trailingGapLength);

}

return gapsToAdd;

}

/**

* Realigns the given protein to match the alignment of the dna, using codon

889

* mappings to translate aligned codon positions to protein residues.

890

891

* @param protein

892

* the alignment whose sequences are realigned by this method

893

* @param dna

894

* the dna alignment whose alignment we are 'copying'

895

* @return the number of sequences that were realigned

896

897

public static int alignProteinAsDna(AlignmentI protein, AlignmentI dna)

898

{

899

if (protein.isNucleotide() || !dna.isNucleotide())

900

{

901

jalview.bin.Console

902

.errPrintln("Wrong alignment type in alignProteinAsDna");

903

return 0;

904

}

905

List<SequenceI> unmappedProtein = new ArrayList<>();

906

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons = buildCodonColumnsMap(

907

protein, dna, unmappedProtein);

908

return alignProteinAs(protein, alignedCodons, unmappedProtein);

}

/**

* Realigns the given dna to match the alignment of the protein, using codon

913

* mappings to translate aligned peptide positions to codons.

914

915

* Always produces a padded CDS alignment.

916

917

* @param dna

918

* the alignment whose sequences are realigned by this method

919

* @param protein

920

* the protein alignment whose alignment we are 'copying'

921

* @return the number of sequences that were realigned

922

923

public static int alignCdsAsProtein(AlignmentI dna, AlignmentI protein)

924

{

925

if (protein.isNucleotide() || !dna.isNucleotide())

926

{

927

jalview.bin.Console

928

.errPrintln("Wrong alignment type in alignProteinAsDna");

929

return 0;

930

}

931

// todo: implement this

932

List<AlignedCodonFrame> mappings = protein.getCodonFrames();

933

int alignedCount = 0;

934

int width = 0; // alignment width for padding CDS

935

for (SequenceI dnaSeq : dna.getSequences())

936

{

937

if (alignCdsSequenceAsProtein(dnaSeq, protein, mappings,

938

dna.getGapCharacter()))

{

alignedCount++;

}

width = Math.max(dnaSeq.getLength(), width);

}

int oldwidth;

int diff;

for (SequenceI dnaSeq : dna.getSequences())

947

{

948

oldwidth = dnaSeq.getLength();

949

diff = width - oldwidth;

950

if (diff > 0)

951

{

952

dnaSeq.insertCharAt(oldwidth, diff, dna.getGapCharacter());

}

}

return alignedCount;

}

/**

* Helper method to align (if possible) the dna sequence to match the

960

* alignment of a mapped protein sequence. This is currently limited to

961

* handling coding sequence only.

* @param cdsSeq

* @param protein

* @param mappings

* @param gapChar

* @return

static boolean alignCdsSequenceAsProtein(SequenceI cdsSeq,

970

AlignmentI protein, List<AlignedCodonFrame> mappings,

971

char gapChar)

972

{

973

SequenceI cdsDss = cdsSeq.getDatasetSequence();

if (cdsDss == null)

{

System.err

.println("alignCdsSequenceAsProtein needs aligned sequence!");

return false;

}

List<AlignedCodonFrame> dnaMappings = MappingUtils

982

.findMappingsForSequence(cdsSeq, mappings);

983

for (AlignedCodonFrame mapping : dnaMappings)

984

{

985

SequenceI peptide = mapping.findAlignedSequence(cdsSeq, protein);

986

if (peptide != null)

987

{

988

final int peptideLength = peptide.getLength();

989

Mapping map = mapping.getMappingBetween(cdsSeq, peptide);

990

if (map != null)

991

{

992

MapList mapList = map.getMap();

993

if (map.getTo() == peptide.getDatasetSequence())

994

{

995

mapList = mapList.getInverse();

996

}

997

final int cdsLength = cdsDss.getLength();

998

int mappedFromLength = MappingUtils

999

.getLength(mapList.getFromRanges());

1000

int mappedToLength = MappingUtils

1001

.getLength(mapList.getToRanges());

1002

boolean addStopCodon = (cdsLength == mappedFromLength

1003

* CODON_LENGTH + CODON_LENGTH)

1004

|| (peptide.getDatasetSequence()

1005

.getLength() == mappedFromLength - 1);

1006

if (cdsLength != mappedToLength && !addStopCodon)

1007

{

1008

jalview.bin.Console.errPrintln(String.format(

1009

"Can't align cds as protein (length mismatch %d/%d): %s",

1010

cdsLength, mappedToLength, cdsSeq.getName()));

}

* pre-fill the aligned cds sequence with gaps

1015

1016

char[] alignedCds = new char[peptideLength * CODON_LENGTH

1017

+ (addStopCodon ? CODON_LENGTH : 0)];

1018

Arrays.fill(alignedCds, gapChar);

1019

1020

1021

* walk over the aligned peptide sequence and insert mapped

1022

* codons for residues in the aligned cds sequence

1023

1024

int copiedBases = 0;

1025

int cdsStart = cdsDss.getStart();

1026

int proteinPos = peptide.getStart() - 1;

1027

int cdsCol = 0;

1028

1029

for (int col = 0; col < peptideLength; col++)

1030

{

1031

char residue = peptide.getCharAt(col);

1032

1033

if (Comparison.isGap(residue))

1034

{

1035

cdsCol += CODON_LENGTH;

}

else

{

proteinPos++;

int[] codon = mapList.locateInTo(proteinPos, proteinPos);

1041

if (codon == null)

1042

{

1043

// e.g. incomplete start codon, X in peptide

1044

cdsCol += CODON_LENGTH;

}

else

{

for (int j = codon[0]; j <= codon[1]; j++)

1049

{

1050

char mappedBase = cdsDss.getCharAt(j - cdsStart);

1051

alignedCds[cdsCol++] = mappedBase;

copiedBases++;

}

}

}

}

* append stop codon if not mapped from protein,

1060

* closing it up to the end of the mapped sequence

1061

1062

if (copiedBases == cdsLength - CODON_LENGTH)

1063

{

1064

for (int i = alignedCds.length - 1; i >= 0; i--)

1065

{

1066

if (!Comparison.isGap(alignedCds[i]))

1067

{

1068

cdsCol = i + 1; // gap just after end of sequence

break;

}

}

for (int i = cdsLength - CODON_LENGTH; i < cdsLength; i++)

1073

{

1074

alignedCds[cdsCol++] = cdsDss.getCharAt(i);

1075

}

1076

}

1077

cdsSeq.setSequence(new String(alignedCds));

return true;

}

}

}

return false;

}

/**

* Builds a map whose key is an aligned codon position (3 alignment column

1087

* numbers base 0), and whose value is a map from protein sequence to each

1088

* protein's peptide residue for that codon. The map generates an ordering of

1089

* the codons, and allows us to read off the peptides at each position in

1090

* order to assemble 'aligned' protein sequences.

1091

1092

* @param protein

1093

* the protein alignment

1094

* @param dna

1095

* the coding dna alignment

1096

* @param unmappedProtein

1097

* any unmapped proteins are added to this list

1098

* @return

1099

1100

protected static Map<AlignedCodon, Map<SequenceI, AlignedCodon>> buildCodonColumnsMap(

1101

AlignmentI protein, AlignmentI dna,

1102

List<SequenceI> unmappedProtein)

1103

{

1104

1105

* maintain a list of any proteins with no mappings - these will be

1106

* rendered 'as is' in the protein alignment as we can't align them

1107

1108

unmappedProtein.addAll(protein.getSequences());

1109

1110

List<AlignedCodonFrame> mappings = protein.getCodonFrames();

1111

1112

1113

* Map will hold, for each aligned codon position e.g. [3, 5, 6], a map of

1114

* {dnaSequence, {proteinSequence, codonProduct}} at that position. The

1115

* comparator keeps the codon positions ordered.

1116

1117

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons = new TreeMap<>(

1118

new CodonComparator());

1119

1120

for (SequenceI dnaSeq : dna.getSequences())

1121

{

1122

for (AlignedCodonFrame mapping : mappings)

1123

{

1124

516

SequenceI prot = mapping.findAlignedSequence(dnaSeq, protein);

1125

516

if (prot != null)

1126

{

1127

Mapping seqMap = mapping.getMappingForSequence(dnaSeq);

1128

addCodonPositions(dnaSeq, prot, protein.getGapCharacter(), seqMap,

1129

alignedCodons);

1130

unmappedProtein.remove(prot);

}

}

}

* Finally add any unmapped peptide start residues (e.g. for incomplete

1137

* codons) as if at the codon position before the second residue

1138

1139

// TODO resolve JAL-2022 so this fudge can be removed

1140

int mappedSequenceCount = protein.getHeight() - unmappedProtein.size();

1141

addUnmappedPeptideStarts(alignedCodons, mappedSequenceCount);

1142

1143

return alignedCodons;

}

/**

* Scans for any protein mapped from position 2 (meaning unmapped start

1148

* position e.g. an incomplete codon), and synthesizes a 'codon' for it at the

1149

* preceding position in the alignment

1150

1151

* @param alignedCodons

1152

* the codon-to-peptide map

1153

* @param mappedSequenceCount

1154

* the number of distinct sequences in the map

1155

1156

protected static void addUnmappedPeptideStarts(

1157

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons,

1158

int mappedSequenceCount)

1159

{

1160

// TODO delete this ugly hack once JAL-2022 is resolved

1161

// i.e. we can model startPhase > 0 (incomplete start codon)

1162

1163

List<SequenceI> sequencesChecked = new ArrayList<>();

1164

AlignedCodon lastCodon = null;

1165

Map<SequenceI, AlignedCodon> toAdd = new HashMap<>();

1166

1167

for (Entry<AlignedCodon, Map<SequenceI, AlignedCodon>> entry : alignedCodons

1168

.entrySet())

1169

{

1170

1913

for (Entry<SequenceI, AlignedCodon> sequenceCodon : entry.getValue()

1171

.entrySet())

1172

{

1173

10671

SequenceI seq = sequenceCodon.getKey();

1174

10671

if (sequencesChecked.contains(seq))

1175

{

1176

10641

continue;

1177

}

1178

sequencesChecked.add(seq);

1179

AlignedCodon codon = sequenceCodon.getValue();

1180

if (codon.peptideCol > 1)

1181

{

1182

jalview.bin.Console.errPrintln(

1183

"Problem mapping protein with >1 unmapped start positions: "

1184

+ seq.getName());

1185

}

1186

else if (codon.peptideCol == 1)

1187

{

1188

1189

* first position (peptideCol == 0) was unmapped - add it

1190

1191

if (lastCodon != null)

1192

{

1193

AlignedCodon firstPeptide = new AlignedCodon(lastCodon.pos1,

1194

lastCodon.pos2, lastCodon.pos3,

1195

String.valueOf(seq.getCharAt(0)), 0);

1196

toAdd.put(seq, firstPeptide);

}

else

{

* unmapped residue at start of alignment (no prior column) -

1202

* 'insert' at nominal codon [0, 0, 0]

1203

1204

AlignedCodon firstPeptide = new AlignedCodon(0, 0, 0,

1205

String.valueOf(seq.getCharAt(0)), 0);

1206

toAdd.put(seq, firstPeptide);

1207

}

1208

}

1209

if (sequencesChecked.size() == mappedSequenceCount)

1210

{

1211

// no need to check past first mapped position in all sequences

break;

}

}

1913

lastCodon = entry.getKey();

}

* add any new codons safely after iterating over the map

1220

1221

for (Entry<SequenceI, AlignedCodon> startCodon : toAdd.entrySet())

1222

{

1223

addCodonToMap(alignedCodons, startCodon.getValue(),

1224

startCodon.getKey());

}

}

/**

* Update the aligned protein sequences to match the codon alignments given in

* the map.

* @param protein

* @param alignedCodons

1234

* an ordered map of codon positions (columns), with sequence/peptide

1235

* values present in each column

1236

* @param unmappedProtein

1237

* @return

1238

1239

protected static int alignProteinAs(AlignmentI protein,

1240

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons,

1241

List<SequenceI> unmappedProtein)

1242

{

1243

1244

* prefill peptide sequences with gaps

1245

1246

int alignedWidth = alignedCodons.size();

1247

char[] gaps = new char[alignedWidth];

1248

Arrays.fill(gaps, protein.getGapCharacter());

1249

Map<SequenceI, char[]> peptides = new HashMap<>();

1250

for (SequenceI seq : protein.getSequences())

1251

{

1252

if (!unmappedProtein.contains(seq))

1253

{

1254

peptides.put(seq, Arrays.copyOf(gaps, gaps.length));

}

}

* Traverse the codons left to right (as defined by CodonComparator)

1260

* and insert peptides in each column where the sequence is mapped.

1261

* This gives a peptide 'alignment' where residues are aligned if their

1262

* corresponding codons occupy the same columns in the cdna alignment.

1263

1264

int column = 0;

1265

for (AlignedCodon codon : alignedCodons.keySet())

1266

{

1267

1914

final Map<SequenceI, AlignedCodon> columnResidues = alignedCodons

1268

.get(codon);

1269

1914

for (Entry<SequenceI, AlignedCodon> entry : columnResidues.entrySet())

1270

{

1271

10682

char residue = entry.getValue().product.charAt(0);

1272

10682

peptides.get(entry.getKey())[column] = residue;

1273

}

1274

1914

column++;

}

* and finally set the constructed sequences

1279

1280

for (Entry<SequenceI, char[]> entry : peptides.entrySet())

1281

{

1282

entry.getKey().setSequence(new String(entry.getValue()));

}

return 0;

}

/**

* Populate the map of aligned codons by traversing the given sequence

1290

* mapping, locating the aligned positions of mapped codons, and adding those

1291

* positions and their translation products to the map.

1292

1293

* @param dna

1294

* the aligned sequence we are mapping from

1295

* @param protein

1296

* the sequence to be aligned to the codons

1297

* @param gapChar

1298

* the gap character in the dna sequence

1299

* @param seqMap

1300

* a mapping to a sequence translation

1301

* @param alignedCodons

1302

* the map we are building up

1303

1304

static void addCodonPositions(SequenceI dna, SequenceI protein,

1305

char gapChar, Mapping seqMap,

1306

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons)

1307

{

1308

Iterator<AlignedCodon> codons = seqMap.getCodonIterator(dna, gapChar);

1309

1310

1311

* add codon positions, and their peptide translations, to the alignment

1312

* map, while remembering the first codon mapped

1313

1314

10716

while (codons.hasNext())

1315

{

1316

10684

try

1317

{

1318

10684

AlignedCodon codon = codons.next();

1319

10684

addCodonToMap(alignedCodons, codon, protein);

1320

} catch (IncompleteCodonException e)

1321

{

1322

// possible incomplete trailing codon - ignore

1323

} catch (NoSuchElementException e)

1324

{

1325

// possibly peptide lacking STOP

}

}

}

/**

* Helper method to add a codon-to-peptide entry to the aligned codons map

1332

1333

* @param alignedCodons

* @param codon

* @param protein

10690

protected static void addCodonToMap(

1338

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons,

1339

AlignedCodon codon, SequenceI protein)

1340

{

1341

10690

Map<SequenceI, AlignedCodon> seqProduct = alignedCodons.get(codon);

1342

10690

if (seqProduct == null)

1343

{

1344

1914

seqProduct = new HashMap<>();

1345

1914

alignedCodons.put(codon, seqProduct);

1346

}

1347

10690

seqProduct.put(protein, codon);

}

/**

* Returns true if a cDNA/Protein mapping either exists, or could be made,

1352

* between at least one pair of sequences in the two alignments. Currently,

1353

* the logic is:

1354

* <ul>

1355

* <li>One alignment must be nucleotide, and the other protein</li>

1356

* <li>At least one pair of sequences must be already mapped, or mappable</li>

1357

* <li>Mappable means the nucleotide translation matches the protein

1358

* sequence</li>

1359

* <li>The translation may ignore start and stop codons if present in the

* nucleotide</li>

* </ul>

* @param al1

* @param al2

* @return

public static boolean isMappable(AlignmentI al1, AlignmentI al2)

1368

{

1369

if (al1 == null || al2 == null)

{

return false;

}

* Require one nucleotide and one protein

1376

1377

if (al1.isNucleotide() == al2.isNucleotide())

{

return false;

}

AlignmentI dna = al1.isNucleotide() ? al1 : al2;

1382

AlignmentI protein = dna == al1 ? al2 : al1;

1383

List<AlignedCodonFrame> mappings = protein.getCodonFrames();

1384

for (SequenceI dnaSeq : dna.getSequences())

1385

{

1386

for (SequenceI proteinSeq : protein.getSequences())

1387

{

1388

if (isMappable(dnaSeq, proteinSeq, mappings))

{

return true;

}

}

}

return false;

}

/**

* Returns true if the dna sequence is mapped, or could be mapped, to the

* protein sequence.

* @param dnaSeq

* @param proteinSeq

* @param mappings

* @return

protected static boolean isMappable(SequenceI dnaSeq,

1407

SequenceI proteinSeq, List<AlignedCodonFrame> mappings)

1408

{

1409

if (dnaSeq == null || proteinSeq == null)

{

return false;

}

SequenceI dnaDs = dnaSeq.getDatasetSequence() == null ? dnaSeq

1415

: dnaSeq.getDatasetSequence();

1416

SequenceI proteinDs = proteinSeq.getDatasetSequence() == null

1417

? proteinSeq

1418

: proteinSeq.getDatasetSequence();

1419

1420

for (AlignedCodonFrame mapping : mappings)

1421

{

1422

if (proteinDs == mapping.getAaForDnaSeq(dnaDs))

{

* already mapped

return true;

}

}

* Just try to make a mapping (it is not yet stored), test whether

1433

* successful.

1434

1435

return mapCdnaToProtein(proteinDs, dnaDs) != null;

}

/**

* Finds any reference annotations associated with the sequences in

1440

* sequenceScope, that are not already added to the alignment, and adds them

1441

* to the 'candidates' map. Also populates a lookup table of annotation

1442

* labels, keyed by calcId, for use in constructing tooltips or the like.

1443

1444

* @param sequenceScope

1445

* the sequences to scan for reference annotations

1446

* @param labelForCalcId

1447

* (optional) map to populate with label for calcId

1448

* @param candidates

1449

* map to populate with annotations for sequence

1450

* @param al

1451

* the alignment to check for presence of annotations

1452

1453

public static void findAddableReferenceAnnotations(

1454

List<SequenceI> sequenceScope, Map<String, String> labelForCalcId,

1455

final Map<SequenceI, List<AlignmentAnnotation>> candidates,

1456

AlignmentI al)

1457

{

1458

if (sequenceScope == null)

{

return;

}

* For each sequence in scope, make a list of any annotations on the

1465

* underlying dataset sequence which are not already on the alignment.

1466

1467

* Add to a map of { alignmentSequence, <List of annotations to add> }

1468

1469

for (SequenceI seq : sequenceScope)

1470

{

1471

SequenceI dataset = seq.getDatasetSequence();

if (dataset == null)

{

continue;

}

AlignmentAnnotation[] datasetAnnotations = dataset.getAnnotation();

1477

if (datasetAnnotations == null)

{

continue;

}

final List<AlignmentAnnotation> result = new ArrayList<>();

1482

for (AlignmentAnnotation dsann : datasetAnnotations)

1483

{

1484

1485

* Find matching annotations on the alignment. If none is found, then

1486

* add this annotation to the list of 'addable' annotations for this

1487

* sequence.

1488

1489

155

final Iterable<AlignmentAnnotation> matchedAlignmentAnnotations = al

1490

.findAnnotations(seq, dsann.getCalcId(), dsann.label);

1491

155

boolean found = false;

1492

155

if (matchedAlignmentAnnotations != null)

1493

{

1494

152

for (AlignmentAnnotation matched : matchedAlignmentAnnotations)

1495

{

1496

135

if (dsann.description.equals(matched.description))

{

found = true;

break;

}

}

}

155

if (!found)

1504

{

1505

101

result.add(dsann);

1506

101

if (labelForCalcId != null)

1507

{

1508

labelForCalcId.put(dsann.getCalcId(), dsann.label);

}

}

}

* Save any addable annotations for this sequence

1514

1515

if (!result.isEmpty())

1516

{

1517

candidates.put(seq, result);

}

}

}

/**

* Adds annotations to the top of the alignment annotations, in the same order

1524

* as their related sequences. If you already have an annotation and want to

1525

* add it to a sequence in an alignment use {@code addReferenceAnnotationTo}

1526

1527

* @param annotations

1528

* the annotations to add

1529

* @param alignment

1530

* the alignment to add them to

1531

* @param selectionGroup

1532

* current selection group - may be null, if provided then any added

1533

* annotation will be trimmed to just those columns in the selection

1534

* group

1535

1536

public static void addReferenceAnnotations(

1537

Map<SequenceI, List<AlignmentAnnotation>> annotations,

1538

final AlignmentI alignment, final SequenceGroup selectionGroup)

1539

{

1540

for (SequenceI seq : annotations.keySet())

1541

{

1542

for (AlignmentAnnotation ann : annotations.get(seq))

1543

{

1544

addReferenceAnnotationTo(alignment, seq, ann, selectionGroup);

}

}

}

public static boolean isSSAnnotationPresent(

1550

Map<SequenceI, List<AlignmentAnnotation>> annotations)

1551

{

1552

1553

for (SequenceI seq : annotations.keySet())

1554

{

1555

if (isSecondaryStructurePresent(

1556

annotations.get(seq).toArray(new AlignmentAnnotation[0])))

{

return true;

}

}

return false;

}

/**

* Make a copy of a reference annotation {@code ann} and add it to an

1566

* alignment sequence {@code seq} in {@code alignment}, optionally limited to

1567

* the extent of {@code selectionGroup}

* @param alignment

* @param seq

* @param ann

* @param selectionGroup

1573

* current selection group - may be null, if provided then any added

1574

* annotation will be trimmed to just those columns in the selection

1575

* group

1576

* @return annotation added to {@code seq and {@code alignment}

1577

1578

public static AlignmentAnnotation addReferenceAnnotationTo(

1579

final AlignmentI alignment, final SequenceI seq,

1580

final AlignmentAnnotation ann, final SequenceGroup selectionGroup)

1581

{

1582

AlignmentAnnotation copyAnn = new AlignmentAnnotation(ann);

1583

int startRes = 0;

1584

int endRes = ann.annotations.length;

1585

if (selectionGroup != null)

1586

{

1587

startRes = -1 + Math.min(seq.getEnd(), Math.max(seq.getStart(),

1588

seq.findPosition(selectionGroup.getStartRes())));

1589

endRes = -1 + Math.min(seq.getEnd(),

1590

seq.findPosition(selectionGroup.getEndRes()));

1591

1592

}

1593

copyAnn.restrict(startRes, endRes + 0);

1594

1595

1596

* Add to the sequence (sets copyAnn.datasetSequence), unless the

1597

* original annotation is already on the sequence.

1598

1599

if (!seq.hasAnnotation(ann))

1600

{

1601

ContactMatrixI cm = seq.getDatasetSequence().getContactMatrixFor(ann);

1602

if (cm != null)

1603

{

1604

seq.addContactListFor(copyAnn, cm);

1605

}

1606

seq.addAlignmentAnnotation(copyAnn);

1607

}

1608

// adjust for gaps

1609

copyAnn.adjustForAlignment();

1610

// add to the alignment and set visible

1611

alignment.addAnnotation(copyAnn);

1612

copyAnn.visible = true;

return copyAnn;

}

/**

* Set visibility of alignment annotations of specified types (labels), for

1619

* specified sequences. This supports controls like "Show all secondary

1620

* structure", "Hide all Temp factor", etc.

1621

1622

* @al the alignment to scan for annotations

1623

* @param types

1624

* the types (labels) of annotations to be updated

1625

* @param forSequences

1626

* if not null, only annotations linked to one of these sequences are

1627

* in scope for update; if null, acts on all sequence annotations

1628

* @param anyType

1629

* if this flag is true, 'types' is ignored (label not checked)

1630

* @param doShow

1631

* if true, set visibility on, else set off

1632

1633

public static void showOrHideSequenceAnnotations(AlignmentI al,

1634

Collection<String> types, List<SequenceI> forSequences,

1635

boolean anyType, boolean doShow)

1636

{

1637

AlignmentAnnotation[] anns = al.getAlignmentAnnotation();

1638

if (anns != null)

1639

{

1640

for (AlignmentAnnotation aa : anns)

1641

{

1642

if (anyType || types.contains(aa.label))

1643

{

1644

if ((aa.sequenceRef != null) && (forSequences == null

1645

|| forSequences.contains(aa.sequenceRef)))

{

aa.visible = doShow;

}

}

}

}

}

/**

* Shows or hides auto calculated annotations for a sequence group.

1656

1657

* @param al

1658

* The alignment object with the annotations.

1659

* @param type

1660

* The type of annotation to show or hide.

1661

* @param selectedGroup

1662

* The sequence group for which the annotations should be shown or

1663

* hidden.

1664

* @param anyType

1665

* If true, all types of annotations will be shown/hidden.

1666

* @param doShow

1667

* If true, the annotations will be shown; if false, annotations will

1668

* be hidden.

1669

1670

public static void showOrHideAutoCalculatedAnnotationsForGroup(

1671

AlignmentI al, String type, SequenceGroup selectedGroup,

1672

boolean anyType, boolean doShow)

1673

{

1674

// Get all alignment annotations

1675

AlignmentAnnotation[] anns = al.getAlignmentAnnotation();

if (anns != null)

{

for (AlignmentAnnotation aa : anns)

1680

{

1681

// Check if anyType is true or if the annotation's label contains the

1682

// specified type (currently for secondary structure consensus)

1683

if ((anyType && aa.label

1684

.startsWith(Constants.SECONDARY_STRUCTURE_CONSENSUS_LABEL))

1685

|| aa.label.startsWith(type))

1686

{

1687

// If the annotation's group reference is not null and matches the

1688

// selected group, update its visibility.

1689

if (aa.groupRef != null && selectedGroup == aa.groupRef)

{

aa.visible = doShow;

}

}

}

}

}

public static AlignmentAnnotation getFirstSequenceAnnotationOfType(

1699

AlignmentI al, int graphType)

1700

{

1701

AlignmentAnnotation[] anns = al.getAlignmentAnnotation();

1702

if (anns != null)

1703

{

1704

for (AlignmentAnnotation aa : anns)

1705

{

1706

if (aa.sequenceRef != null && aa.graph == graphType)

return aa;

}

}

return null;

}

/**

* Returns true if either sequence has a cross-reference to the other

* @param seq1

* @param seq2

* @return

public static boolean haveCrossRef(SequenceI seq1, SequenceI seq2)

1721

{

1722

// Note: moved here from class CrossRef as the latter class has dependencies

1723

// not availability to the applet's classpath

1724

return hasCrossRef(seq1, seq2) || hasCrossRef(seq2, seq1);

}

/**

* Returns true if seq1 has a cross-reference to seq2. Currently this assumes

1729

* that sequence name is structured as Source|AccessionId.

* @param seq1

* @param seq2

* @return

108

public static boolean hasCrossRef(SequenceI seq1, SequenceI seq2)

1736

{

1737

108

if (seq1 == null || seq2 == null)

{

return false;

}

100

String name = seq2.getName();

1742

100

final List<DBRefEntry> xrefs = seq1.getDBRefs();

1743

100

if (xrefs != null)

1744

{

1745

for (int ix = 0, nx = xrefs.size(); ix < nx; ix++)

1746

{

1747

DBRefEntry xref = xrefs.get(ix);

1748

String xrefName = xref.getSource() + "|" + xref.getAccessionId();

1749

// case-insensitive test, consistent with DBRefEntry.equalRef()

1750

if (xrefName.equalsIgnoreCase(name))

{

return true;

}

}

}

return false;

}

/**

* Constructs an alignment consisting of the mapped (CDS) regions in the given

1761

* nucleotide sequences, and updates mappings to match. The CDS sequences are

1762

* added to the original alignment's dataset, which is shared by the new

1763

* alignment. Mappings from nucleotide to CDS, and from CDS to protein, are

1764

* added to the alignment dataset.

1765

1766

* @param dna

1767

* aligned nucleotide (dna or cds) sequences

1768

* @param dataset

1769

* the alignment dataset the sequences belong to

1770

* @param products

1771

* (optional) to restrict results to CDS that map to specified

1772

* protein products

1773

* @return an alignment whose sequences are the cds-only parts of the dna

1774

* sequences (or null if no mappings are found)

1775

1776

public static AlignmentI makeCdsAlignment(SequenceI[] dna,

1777

AlignmentI dataset, SequenceI[] products)

1778

{

1779

if (dataset == null || dataset.getDataset() != null)

1780

{

1781

throw new IllegalArgumentException(

1782

"IMPLEMENTATION ERROR: dataset.getDataset() must be null!");

1783

}

1784

List<SequenceI> foundSeqs = new ArrayList<>();

1785

List<SequenceI> cdsSeqs = new ArrayList<>();

1786

List<AlignedCodonFrame> mappings = dataset.getCodonFrames();

1787

HashSet<SequenceI> productSeqs = null;

1788

if (products != null)

1789

{

1790

productSeqs = new HashSet<>();

1791

for (SequenceI seq : products)

1792

{

1793

productSeqs.add(seq.getDatasetSequence() == null ? seq

1794

: seq.getDatasetSequence());

}

}

* Construct CDS sequences from mappings on the alignment dataset.

1800

* The logic is:

1801

* - find the protein product(s) mapped to from each dna sequence

1802

* - if the mapping covers the whole dna sequence (give or take start/stop

1803

* codon), take the dna as the CDS sequence

1804

* - else search dataset mappings for a suitable dna sequence, i.e. one

1805

* whose whole sequence is mapped to the protein

1806

* - if no sequence found, construct one from the dna sequence and mapping

1807

* (and add it to dataset so it is found if this is repeated)

1808

1809

for (SequenceI dnaSeq : dna)

1810

{

1811

SequenceI dnaDss = dnaSeq.getDatasetSequence() == null ? dnaSeq

1812

: dnaSeq.getDatasetSequence();

1813

1814

List<AlignedCodonFrame> seqMappings = MappingUtils

1815

.findMappingsForSequence(dnaSeq, mappings);

1816

for (AlignedCodonFrame mapping : seqMappings)

1817

{

1818

List<Mapping> mappingsFromSequence = mapping

1819

.getMappingsFromSequence(dnaSeq);

1820

1821

for (Mapping aMapping : mappingsFromSequence)

1822

{

1823

MapList mapList = aMapping.getMap();

1824

if (mapList.getFromRatio() == 1)

1825

{

1826

1827

* not a dna-to-protein mapping (likely dna-to-cds)

continue;

}

* skip if mapping is not to one of the target set of proteins

1834

1835

SequenceI proteinProduct = aMapping.getTo();

1836

if (productSeqs != null && !productSeqs.contains(proteinProduct))

{

continue;

}

* try to locate the CDS from the dataset mappings;

1843

* guard against duplicate results (for the case that protein has

1844

* dbrefs to both dna and cds sequences)

1845

1846

SequenceI cdsSeq = findCdsForProtein(mappings, dnaSeq,

1847

seqMappings, aMapping);

1848

if (cdsSeq != null)

1849

{

1850

if (!foundSeqs.contains(cdsSeq))

1851

{

1852

foundSeqs.add(cdsSeq);

1853

SequenceI derivedSequence = cdsSeq.deriveSequence();

1854

cdsSeqs.add(derivedSequence);

1855

if (!dataset.getSequences().contains(cdsSeq))

1856

{

1857

dataset.addSequence(cdsSeq);

}

}

continue;

}

* didn't find mapped CDS sequence - construct it and add

1865

* its dataset sequence to the dataset

1866

1867

cdsSeq = makeCdsSequence(dnaSeq.getDatasetSequence(), aMapping,

1868

dataset).deriveSequence();

1869

// cdsSeq has a name constructed as CDS|<dbref>

1870

// <dbref> will be either the accession for the coding sequence,

1871

// marked in the /via/ dbref to the protein product accession

1872

// or it will be the original nucleotide accession.

1873

SequenceI cdsSeqDss = cdsSeq.getDatasetSequence();

cdsSeqs.add(cdsSeq);

* build the mapping from CDS to protein

1879

1880

List<int[]> cdsRange = Collections

1881

.singletonList(new int[]

1882

{ cdsSeq.getStart(),

1883

cdsSeq.getLength() + cdsSeq.getStart() - 1 });

1884

MapList cdsToProteinMap = new MapList(cdsRange,

1885

mapList.getToRanges(), mapList.getFromRatio(),

1886

mapList.getToRatio());

1887

1888

if (!dataset.getSequences().contains(cdsSeqDss))

1889

{

1890

1891

* if this sequence is a newly created one, add it to the dataset

1892

* and made a CDS to protein mapping (if sequence already exists,

1893

* CDS-to-protein mapping _is_ the transcript-to-protein mapping)

1894

1895

dataset.addSequence(cdsSeqDss);

1896

AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();

1897

cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct,

cdsToProteinMap);

* guard against duplicating the mapping if repeating this action

1902

1903

if (!mappings.contains(cdsToProteinMapping))

1904

{

1905

mappings.add(cdsToProteinMapping);

}

}

propagateDBRefsToCDS(cdsSeqDss, dnaSeq.getDatasetSequence(),

1910

proteinProduct, aMapping);

1911

1912

* add another mapping from original 'from' range to CDS

1913

1914

AlignedCodonFrame dnaToCdsMapping = new AlignedCodonFrame();

1915

final MapList dnaToCdsMap = new MapList(mapList.getFromRanges(),

1916

cdsRange, 1, 1);

1917

dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeqDss,

1918

dnaToCdsMap);

1919

if (!mappings.contains(dnaToCdsMapping))

1920

{

1921

mappings.add(dnaToCdsMapping);

}

* transfer dna chromosomal loci (if known) to the CDS

1926

* sequence (via the mapping)

1927

1928

final MapList cdsToDnaMap = dnaToCdsMap.getInverse();

1929

transferGeneLoci(dnaSeq, cdsToDnaMap, cdsSeq);

1930

1931

1932

* add DBRef with mapping from protein to CDS

1933

* (this enables Get Cross-References from protein alignment)

1934

* This is tricky because we can't have two DBRefs with the

1935

* same source and accession, so need a different accession for

1936

* the CDS from the dna sequence

1937

1938

1939

// specific use case:

1940

// Genomic contig ENSCHR:1, contains coding regions for ENSG01,

1941

// ENSG02, ENSG03, with transcripts and products similarly named.

1942

// cannot add distinct dbrefs mapping location on ENSCHR:1 to ENSG01

1943

1944

// JBPNote: ?? can't actually create an example that demonstrates we

1945

// need to

1946

// synthesize an xref.

1947

1948

List<DBRefEntry> primrefs = dnaDss.getPrimaryDBRefs();

1949

for (int ip = 0, np = primrefs.size(); ip < np; ip++)

1950

{

1951

DBRefEntry primRef = primrefs.get(ip);

1952

1953

* create a cross-reference from CDS to the source sequence's

1954

* primary reference and vice versa

1955

1956

String source = primRef.getSource();

1957

String version = primRef.getVersion();

1958

DBRefEntry cdsCrossRef = new DBRefEntry(source,

1959

source + ":" + version, primRef.getAccessionId());

1960

cdsCrossRef

1961

.setMap(new Mapping(dnaDss, new MapList(cdsToDnaMap)));

1962

cdsSeqDss.addDBRef(cdsCrossRef);

1963

1964

dnaSeq.addDBRef(new DBRefEntry(source, version,

1965

cdsSeq.getName(), new Mapping(cdsSeqDss, dnaToCdsMap)));

1966

// problem here is that the cross-reference is synthesized -

1967

// cdsSeq.getName() may be like 'CDS|dnaaccession' or

1968

// 'CDS|emblcdsacc'

1969

// assuming cds version same as dna ?!?

1970

1971

DBRefEntry proteinToCdsRef = new DBRefEntry(source, version,

1972

cdsSeq.getName());

1973

1974

proteinToCdsRef.setMap(

1975

new Mapping(cdsSeqDss, cdsToProteinMap.getInverse()));

1976

proteinProduct.addDBRef(proteinToCdsRef);

1977

}

1978

1979

* transfer any features on dna that overlap the CDS

1980

1981

transferFeatures(dnaSeq, cdsSeq, dnaToCdsMap, null,

1982

SequenceOntologyI.CDS);

}

}

}

AlignmentI cds = new Alignment(

1988

cdsSeqs.toArray(new SequenceI[cdsSeqs.size()]));

1989

cds.setDataset(dataset);

return cds;

}

/**

* Tries to transfer gene loci (dbref to chromosome positions) from fromSeq to

1996

* toSeq, mediated by the given mapping between the sequences

1997

1998

* @param fromSeq

1999

* @param targetToFrom

* Map

* @param targetSeq

protected static void transferGeneLoci(SequenceI fromSeq,

2004

MapList targetToFrom, SequenceI targetSeq)

2005

{

2006

if (targetSeq.getGeneLoci() != null)

2007

{

2008

// already have - don't override

2009

return;

2010

}

2011

GeneLociI fromLoci = fromSeq.getGeneLoci();

2012

if (fromLoci == null)

{

return;

}

MapList newMap = targetToFrom.traverse(fromLoci.getMapping());

if (newMap != null)

{

targetSeq.setGeneLoci(fromLoci.getSpeciesId(),

2022

fromLoci.getAssemblyId(), fromLoci.getChromosomeId(), newMap);

}

}

/**

* A helper method that finds a CDS sequence in the alignment dataset that is

2028

* mapped to the given protein sequence, and either is, or has a mapping from,

2029

* the given dna sequence.

2030

2031

* @param mappings

2032

* set of all mappings on the dataset

2033

* @param dnaSeq

2034

* a dna (or cds) sequence we are searching from

2035

* @param seqMappings

2036

* the set of mappings involving dnaSeq

2037

* @param aMapping

2038

* a transcript-to-peptide mapping

2039

* @return

2040

2041

static SequenceI findCdsForProtein(List<AlignedCodonFrame> mappings,

2042

SequenceI dnaSeq, List<AlignedCodonFrame> seqMappings,

Mapping aMapping)

{

* TODO a better dna-cds-protein mapping data representation to allow easy

2047

* navigation; until then this clunky looping around lists of mappings

2048

2049

SequenceI seqDss = dnaSeq.getDatasetSequence() == null ? dnaSeq

2050

: dnaSeq.getDatasetSequence();

2051

SequenceI proteinProduct = aMapping.getTo();

2052

2053

2054

* is this mapping from the whole dna sequence (i.e. CDS)?

2055

* allowing for possible stop codon on dna but not peptide

2056

2057

int mappedFromLength = MappingUtils

2058

.getLength(aMapping.getMap().getFromRanges());

2059

int dnaLength = seqDss.getLength();

2060

if (mappedFromLength == dnaLength

2061

|| mappedFromLength == dnaLength - CODON_LENGTH)

2062

{

2063

2064

* if sequence has CDS features, this is a transcript with no UTR

2065

* - do not take this as the CDS sequence! (JAL-2789)

2066

2067

if (seqDss.getFeatures().getFeaturesByOntology(SequenceOntologyI.CDS)

.isEmpty())

{

return seqDss;

}

}

* looks like we found the dna-to-protein mapping; search for the

2076

* corresponding cds-to-protein mapping

2077

2078

List<AlignedCodonFrame> mappingsToPeptide = MappingUtils

2079

.findMappingsForSequence(proteinProduct, mappings);

2080

for (AlignedCodonFrame acf : mappingsToPeptide)

2081

{

2082

for (SequenceToSequenceMapping map : acf.getMappings())

2083

{

2084

276

Mapping mapping = map.getMapping();

2085

276

if (mapping != aMapping

2086

&& mapping.getMap().getFromRatio() == CODON_LENGTH

2087

&& proteinProduct == mapping.getTo()

2088

&& seqDss != map.getFromSeq())

2089

{

2090

mappedFromLength = MappingUtils

2091

.getLength(mapping.getMap().getFromRanges());

2092

if (mappedFromLength == map.getFromSeq().getLength())

2093

{

2094

2095

* found a 3:1 mapping to the protein product which covers

2096

* the whole dna sequence i.e. is from CDS; finally check the CDS

2097

* is mapped from the given dna start sequence

2098

2099

SequenceI cdsSeq = map.getFromSeq();

2100

// todo this test is weak if seqMappings contains multiple mappings;

2101

// we get away with it if transcript:cds relationship is 1:1

2102

List<AlignedCodonFrame> dnaToCdsMaps = MappingUtils

2103

.findMappingsForSequence(cdsSeq, seqMappings);

2104

if (!dnaToCdsMaps.isEmpty())

{

return cdsSeq;

}

}

}

}

}

return null;

}

/**

* Helper method that makes a CDS sequence as defined by the mappings from the

2117

* given sequence i.e. extracts the 'mapped from' ranges (which may be on

2118

* forward or reverse strand).

* @param seq

* @param mapping

* @param dataset

* - existing dataset. We check for sequences that look like the CDS

2124

* we are about to construct, if one exists already, then we will

2125

* just return that one.

2126

* @return CDS sequence (as a dataset sequence)

2127

2128

static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping,

AlignmentI dataset)

{

* construct CDS sequence name as "CDS|" with 'from id' held in the mapping

2133

* if set (e.g. EMBL protein_id), else sequence name appended

2134

2135

String mapFromId = mapping.getMappedFromId();

2136

final String seqId = "CDS|"

2137

+ (mapFromId != null ? mapFromId : seq.getName());

2138

2139

SequenceI newSeq = null;

2140

2141

2142

* construct CDS sequence by splicing mapped from ranges

2143

2144

char[] seqChars = seq.getSequence();

2145

List<int[]> fromRanges = mapping.getMap().getFromRanges();

2146

int cdsWidth = MappingUtils.getLength(fromRanges);

2147

char[] newSeqChars = new char[cdsWidth];

2148

2149

int newPos = 0;

2150

for (int[] range : fromRanges)

2151

{

2152

if (range[0] <= range[1])

2153

{

2154

// forward strand mapping - just copy the range

2155

int length = range[1] - range[0] + 1;

2156

System.arraycopy(seqChars, range[0] - 1, newSeqChars, newPos,

length);

newPos += length;

}

else

{

// reverse strand mapping - copy and complement one by one

2163

for (int i = range[0]; i >= range[1]; i--)

2164

{

2165

newSeqChars[newPos++] = Dna.getComplement(seqChars[i - 1]);

}

}

newSeq = new Sequence(seqId, newSeqChars, 1, newPos);

}

if (dataset != null)

{

SequenceI[] matches = dataset.findSequenceMatch(newSeq.getName());

2175

if (matches != null)

2176

{

2177

boolean matched = false;

2178

for (SequenceI mtch : matches)

2179

{

2180

if (mtch.getStart() != newSeq.getStart())

{

continue;

}

if (mtch.getEnd() != newSeq.getEnd())

{

continue;

}

if (!Arrays.equals(mtch.getSequence(), newSeq.getSequence()))

{

continue;

}

if (!matched)

{

matched = true;

newSeq = mtch;

}

else

{

Console.error(

"JAL-2154 regression: warning - found (and ignored) a duplicate CDS sequence:"

+ mtch.toString());

}

}

}

}

// newSeq.setDescription(mapFromId);

return newSeq;

}

/**

* Adds any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to

* the given mapping.

* @param cdsSeq

* @param contig

* @param proteinProduct

2218

* @param mapping

2219

* @return list of DBRefEntrys added

2220

2221

protected static List<DBRefEntry> propagateDBRefsToCDS(SequenceI cdsSeq,

2222

SequenceI contig, SequenceI proteinProduct, Mapping mapping)

2223

{

2224

2225

// gather direct refs from contig congruent with mapping

2226

List<DBRefEntry> direct = new ArrayList<>();

2227

HashSet<String> directSources = new HashSet<>();

2228

2229

List<DBRefEntry> refs = contig.getDBRefs();

2230

if (refs != null)

2231

{

2232

292

for (int ib = 0, nb = refs.size(); ib < nb; ib++)

2233

{

2234

279

DBRefEntry dbr = refs.get(ib);

2235

279

MapList map;

2236

if (dbr.hasMap() && (map = dbr.getMap().getMap()).isTripletMap())

2237

{

2238

// check if map is the CDS mapping

2239

if (mapping.getMap().equals(map))

2240

{

2241

direct.add(dbr);

2242

directSources.add(dbr.getSource());

}

}

}

}

List<DBRefEntry> onSource = DBRefUtils.selectRefs(

2248

proteinProduct.getDBRefs(),

2249

directSources.toArray(new String[0]));

2250

List<DBRefEntry> propagated = new ArrayList<>();

2251

2252

// and generate appropriate mappings

2253

for (int ic = 0, nc = direct.size(); ic < nc; ic++)

2254

{

2255

DBRefEntry cdsref = direct.get(ic);

2256

Mapping m = cdsref.getMap();

2257

// clone maplist and mapping

2258

MapList cdsposmap = new MapList(

2259

Arrays.asList(new int[][]

2260

{ new int[] { cdsSeq.getStart(), cdsSeq.getEnd() } }),

2261

m.getMap().getToRanges(), 3, 1);

2262

Mapping cdsmap = new Mapping(m.getTo(), m.getMap());

2263

2264

// create dbref

2265

DBRefEntry newref = new DBRefEntry(cdsref.getSource(),

2266

cdsref.getVersion(), cdsref.getAccessionId(),

2267

new Mapping(cdsmap.getTo(), cdsposmap));

2268

2269

// and see if we can map to the protein product for this mapping.

2270

// onSource is the filtered set of accessions on protein that we are

2271

// tranferring, so we assume accession is the same.

2272

if (cdsmap.getTo() == null && onSource != null)

2273

{

2274

List<DBRefEntry> sourceRefs = DBRefUtils.searchRefs(onSource,

2275

cdsref.getAccessionId());

2276

if (sourceRefs != null)

2277

{

2278

for (DBRefEntry srcref : sourceRefs)

2279

{

2280

if (srcref.getSource().equalsIgnoreCase(cdsref.getSource()))

2281

{

2282

// we have found a complementary dbref on the protein product, so

2283

// update mapping's getTo

2284

newref.getMap().setTo(proteinProduct);

}

}

}

}

cdsSeq.addDBRef(newref);

2290

propagated.add(newref);

}

return propagated;

}

/**

* Transfers co-located features on 'fromSeq' to 'toSeq', adjusting the

2297

* feature start/end ranges, optionally omitting specified feature types.

2298

* Returns the number of features copied.

* @param fromSeq

* @param toSeq

* @param mapping

* the mapping from 'fromSeq' to 'toSeq'

2304

* @param select

2305

* if not null, only features of this type are copied (including

2306

* subtypes in the Sequence Ontology)

2307

* @param omitting

2308

2309

protected static int transferFeatures(SequenceI fromSeq, SequenceI toSeq,

2310

MapList mapping, String select, String... omitting)

2311

{

2312

SequenceI copyTo = toSeq;

2313

while (copyTo.getDatasetSequence() != null)

2314

{

2315

copyTo = copyTo.getDatasetSequence();

2316

}

2317

if (fromSeq == copyTo || fromSeq.getDatasetSequence() == copyTo)

2318

{

2319

return 0; // shared dataset sequence

}

* get features, optionally restricted by an ontology term

2324

2325

List<SequenceFeature> sfs = select == null

2326

? fromSeq.getFeatures().getPositionalFeatures()

2327

: fromSeq.getFeatures().getFeaturesByOntology(select);

2328

2329

int count = 0;

2330

for (SequenceFeature sf : sfs)

2331

{

2332

9610

String type = sf.getType();

2333

9610

boolean omit = false;

2334

9610

for (String toOmit : omitting)

2335

{

2336

9603

if (type.equals(toOmit))

2337

{

2338

134

omit = true;

2339

}

2340

}

2341

9610

if (omit)

2342

{

2343

134

continue;

}

* locate the mapped range - null if either start or end is

2348

* not mapped (no partial overlaps are calculated)

2349

2350

9476

int start = sf.getBegin();

2351

9476

int end = sf.getEnd();

2352

9476

int[] mappedTo = mapping.locateInTo(start, end);

2353

2354

* if whole exon range doesn't map, try interpreting it

2355

* as 5' or 3' exon overlapping the CDS range

2356

2357

9476

if (mappedTo == null)

2358

{

2359

4447

mappedTo = mapping.locateInTo(end, end);

2360

4447

if (mappedTo != null)

2361

{

2362

2363

* end of exon is in CDS range - 5' overlap

2364

* to a range from the start of the peptide

mappedTo[0] = 1;

}

}

9476

if (mappedTo == null)

2370

{

2371

4447

mappedTo = mapping.locateInTo(start, start);

2372

4447

if (mappedTo != null)

2373

{

2374

2375

* start of exon is in CDS range - 3' overlap

2376

* to a range up to the end of the peptide

2377

2378

mappedTo[1] = toSeq.getLength();

2379

}

2380

}

2381

9476

if (mappedTo != null)

2382

{

2383

5029

int newBegin = Math.min(mappedTo[0], mappedTo[1]);

2384

5029

int newEnd = Math.max(mappedTo[0], mappedTo[1]);

2385

5029

SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd,

2386

sf.getFeatureGroup(), sf.getScore());

2387

5029

copyTo.addSequenceFeature(copy);

2388

5029

count++;

}

}

return count;

}

/**

* Returns a mapping from dna to protein by inspecting sequence features of

2396

* type "CDS" on the dna. A mapping is constructed if the total CDS feature

2397

* length is 3 times the peptide length (optionally after dropping a trailing

2398

* stop codon). This method does not check whether the CDS nucleotide sequence

2399

* translates to the peptide sequence.

* @param dnaSeq

* @param proteinSeq

* @return

public static MapList mapCdsToProtein(SequenceI dnaSeq,

2406

SequenceI proteinSeq)

2407

{

2408

List<int[]> ranges = findCdsPositions(dnaSeq);

2409

int mappedDnaLength = MappingUtils.getLength(ranges);

2410

2411

2412

* if not a whole number of codons, truncate mapping

2413

2414

int codonRemainder = mappedDnaLength % CODON_LENGTH;

2415

if (codonRemainder > 0)

2416

{

2417

mappedDnaLength -= codonRemainder;

2418

MappingUtils.removeEndPositions(codonRemainder, ranges);

2419

}

2420

2421

int proteinLength = proteinSeq.getLength();

2422

int proteinStart = proteinSeq.getStart();

2423

int proteinEnd = proteinSeq.getEnd();

2424

2425

2426

* incomplete start codon may mean X at start of peptide

2427

* we ignore both for mapping purposes

2428

2429

if (proteinSeq.getCharAt(0) == 'X')

2430

{

2431

// todo JAL-2022 support startPhase > 0

proteinStart++;

proteinLength--;

}

List<int[]> proteinRange = new ArrayList<>();

2436

2437

2438

* dna length should map to protein (or protein plus stop codon)

2439

2440

int codesForResidues = mappedDnaLength / CODON_LENGTH;

2441

if (codesForResidues == (proteinLength + 1))

2442

{

2443

// assuming extra codon is for STOP and not in peptide

2444

// todo: check trailing codon is indeed a STOP codon

2445

codesForResidues--;

2446

mappedDnaLength -= CODON_LENGTH;

2447

MappingUtils.removeEndPositions(CODON_LENGTH, ranges);

2448

}

2449

2450

if (codesForResidues == proteinLength)

2451

{

2452

proteinRange.add(new int[] { proteinStart, proteinEnd });

2453

return new MapList(ranges, proteinRange, CODON_LENGTH, 1);

}

return null;

}

/**

* Returns a list of CDS ranges found (as sequence positions base 1), i.e. of

2460

* [start, end] positions of sequence features of type "CDS" (or a sub-type of

2461

* CDS in the Sequence Ontology). The ranges are sorted into ascending start

2462

* position order, so this method is only valid for linear CDS in the same

2463

* sense as the protein product.

* @param dnaSeq

* @return

protected static List<int[]> findCdsPositions(SequenceI dnaSeq)

2469

{

2470

List<int[]> result = new ArrayList<>();

2471

2472

List<SequenceFeature> sfs = dnaSeq.getFeatures()

2473

.getFeaturesByOntology(SequenceOntologyI.CDS);

if (sfs.isEmpty())

{

return result;

}

SequenceFeatures.sortFeatures(sfs, true);

2479

2480

for (SequenceFeature sf : sfs)

{

int phase = 0;

try

{

String s = sf.getPhase();

2486

if (s != null)

2487

{

2488

phase = Integer.parseInt(s);

2489

}

2490

} catch (NumberFormatException e)

{

// leave as zero

}

* phase > 0 on first codon means 5' incomplete - skip to the start

2496

* of the next codon; example ENST00000496384

2497

2498

int begin = sf.getBegin();

2499

int end = sf.getEnd();

2500

if (result.isEmpty() && phase > 0)

{

begin += phase;

if (begin > end)

{

// shouldn't happen!

System.err

.println("Error: start phase extends beyond start CDS in "

+ dnaSeq.getName());

}

}

result.add(new int[] { begin, end });

}

* Finally sort ranges by start position. This avoids a dependency on

2516

* keeping features in order on the sequence (if they are in order anyway,

2517

* the sort will have almost no work to do). The implicit assumption is CDS

2518

* ranges are assembled in order. Other cases should not use this method,

2519

* but instead construct an explicit mapping for CDS (e.g. EMBL parsing).

2520

2521

Collections.sort(result, IntRangeComparator.ASCENDING);

return result;

}

/**

* Makes an alignment with a copy of the given sequences, adding in any

2527

* non-redundant sequences which are mapped to by the cross-referenced

* sequences.

* @param seqs

* @param xrefs

* @param dataset

* the alignment dataset shared by the new copy

2534

* @return

2535

2536

public static AlignmentI makeCopyAlignment(SequenceI[] seqs,

2537

SequenceI[] xrefs, AlignmentI dataset)

2538

{

2539

AlignmentI copy = new Alignment(new Alignment(seqs));

2540

copy.setDataset(dataset);

2541

boolean isProtein = !copy.isNucleotide();

2542

SequenceIdMatcher matcher = new SequenceIdMatcher(seqs);

2543

if (xrefs != null)

2544

{

2545

// BH 2019.01.25 recoded to remove iterators

2546

2547

for (int ix = 0, nx = xrefs.length; ix < nx; ix++)

2548

{

2549

SequenceI xref = xrefs[ix];

2550

List<DBRefEntry> dbrefs = xref.getDBRefs();

2551

if (dbrefs != null)

2552

{

2553

for (int ir = 0, nir = dbrefs.size(); ir < nir; ir++)

2554

{

2555

DBRefEntry dbref = dbrefs.get(ir);

2556

Mapping map = dbref.getMap();

2557

SequenceI mto;

2558

if (map == null || (mto = map.getTo()) == null

2559

|| mto.isProtein() != isProtein)

{

continue;

}

SequenceI mappedTo = mto;

2564

SequenceI match = matcher.findIdMatch(mappedTo);

2565

if (match == null)

2566

{

2567

matcher.add(mappedTo);

2568

copy.addSequence(mappedTo);

}

}

}

}

}

return copy;

}

/**

* Try to align sequences in 'unaligned' to match the alignment of their

2579

* mapped regions in 'aligned'. For example, could use this to align CDS

2580

* sequences which are mapped to their parent cDNA sequences.

2581

2582

* This method handles 1:1 mappings (dna-to-dna or protein-to-protein). For

2583

* dna-to-protein or protein-to-dna use alternative methods.

2584

2585

* @param unaligned

2586

* sequences to be aligned

2587

* @param aligned

2588

* holds aligned sequences and their mappings

2589

* @return

2590

2591

public static int alignAs(AlignmentI unaligned, AlignmentI aligned)

2592

{

2593

2594

* easy case - aligning a copy of aligned sequences

2595

2596

if (alignAsSameSequences(unaligned, aligned))

2597

{

2598

return unaligned.getHeight();

}

* fancy case - aligning via mappings between sequences

2603

2604

List<SequenceI> unmapped = new ArrayList<>();

2605

Map<Integer, Map<SequenceI, Character>> columnMap = buildMappedColumnsMap(

2606

unaligned, aligned, unmapped);

2607

int width = columnMap.size();

2608

char gap = unaligned.getGapCharacter();

2609

int realignedCount = 0;

2610

// TODO: verify this loop scales sensibly for very wide/high alignments

2611

2612

for (SequenceI seq : unaligned.getSequences())

2613

{

2614

if (!unmapped.contains(seq))

2615

{

2616

char[] newSeq = new char[width];

2617

Arrays.fill(newSeq, gap); // JBPComment - doubt this is faster than the

2618

// Integer iteration below

int newCol = 0;

int lastCol = 0;

* traverse the map to find columns populated

2624

* by our sequence

2625

2626

for (Integer column : columnMap.keySet())

2627

{

2628

58976

Character c = columnMap.get(column).get(seq);

2629

58976

if (c != null)

2630

{

2631

2632

* sequence has a character at this position

2633

2634

2635

31986

newSeq[newCol] = c;

2636

31986

lastCol = newCol;

2637

}

2638

58976

newCol++;

}

* trim trailing gaps

if (lastCol < width)

{

char[] tmp = new char[lastCol + 1];

2647

System.arraycopy(newSeq, 0, tmp, 0, lastCol + 1);

2648

newSeq = tmp;

2649

}

2650

// TODO: optimise SequenceI to avoid char[]->String->char[]

2651

seq.setSequence(String.valueOf(newSeq));

realignedCount++;

}

}

return realignedCount;

}

/**

* If unaligned and aligned sequences share the same dataset sequences, then

2660

* simply copies the aligned sequences to the unaligned sequences and returns

2661

* true; else returns false

2662

2663

* @param unaligned

2664

* - sequences to be aligned based on aligned

2665

* @param aligned

2666

* - 'guide' alignment containing sequences derived from same dataset

* as unaligned

* @return

static boolean alignAsSameSequences(AlignmentI unaligned,

2671

AlignmentI aligned)

2672

{

2673

if (aligned.getDataset() == null || unaligned.getDataset() == null)

2674

{

2675

return false; // should only pass alignments with datasets here

2676

}

2677

2678

// map from dataset sequence to alignment sequence(s)

2679

Map<SequenceI, List<SequenceI>> alignedDatasets = new HashMap<>();

2680

for (SequenceI seq : aligned.getSequences())

2681

{

2682

SequenceI ds = seq.getDatasetSequence();

2683

if (alignedDatasets.get(ds) == null)

2684

{

2685

alignedDatasets.put(ds, new ArrayList<SequenceI>());

2686

}

2687

alignedDatasets.get(ds).add(seq);

}

* first pass - check whether all sequences to be aligned share a

2692

* dataset sequence with an aligned sequence; also note the leftmost

2693

* ungapped column from which to copy

2694

2695

int leftmost = Integer.MAX_VALUE;

2696

for (SequenceI seq : unaligned.getSequences())

2697

{

2698

final SequenceI ds = seq.getDatasetSequence();

2699

if (!alignedDatasets.containsKey(ds))

{

return false;

}

SequenceI alignedSeq = alignedDatasets.get(ds).get(0);

2704

int startCol = alignedSeq.findIndex(seq.getStart()); // 1..

2705

leftmost = Math.min(leftmost, startCol);

}

* second pass - copy aligned sequences;

2710

* heuristic rule: pair off sequences in order for the case where

2711

* more than one shares the same dataset sequence

2712

2713

final char gapCharacter = aligned.getGapCharacter();

2714

for (SequenceI seq : unaligned.getSequences())

2715

{

2716

List<SequenceI> alignedSequences = alignedDatasets

2717

.get(seq.getDatasetSequence());

2718

if (alignedSequences.isEmpty())

2719

{

2720

2721

* defensive check - shouldn't happen! (JAL-3536)

continue;

}

SequenceI alignedSeq = alignedSequences.get(0);

2726

2727

2728

* gap fill for leading (5') UTR if any

2729

2730

// TODO this copies intron columns - wrong!

2731

int startCol = alignedSeq.findIndex(seq.getStart()); // 1..

2732

int endCol = alignedSeq.findIndex(seq.getEnd());

2733

char[] seqchars = new char[endCol - leftmost + 1];

2734

Arrays.fill(seqchars, gapCharacter);

2735

char[] toCopy = alignedSeq.getSequence(startCol - 1, endCol);

2736

System.arraycopy(toCopy, 0, seqchars, startCol - leftmost,

2737

toCopy.length);

2738

seq.setSequence(String.valueOf(seqchars));

2739

if (alignedSequences.size() > 0)

2740

{

2741

// pop off aligned sequences (except the last one)

2742

alignedSequences.remove(0);

}

}

* finally remove gapped columns (e.g. introns)

2748

2749

new RemoveGapColCommand("", unaligned.getSequencesArray(), 0,

2750

unaligned.getWidth() - 1, unaligned);

return true;

}

/**

* Returns a map whose key is alignment column number (base 1), and whose

2757

* values are a map of sequence characters in that column.

* @param unaligned

* @param aligned

* @param unmapped

* @return

static SortedMap<Integer, Map<SequenceI, Character>> buildMappedColumnsMap(

2765

AlignmentI unaligned, AlignmentI aligned,

2766

List<SequenceI> unmapped)

2767

{

2768

2769

* Map will hold, for each aligned column position, a map of

2770

* {unalignedSequence, characterPerSequence} at that position.

2771

* TreeMap keeps the entries in ascending column order.

2772

2773

SortedMap<Integer, Map<SequenceI, Character>> map = new TreeMap<>();

2774

2775

2776

* record any sequences that have no mapping so can't be realigned

2777

2778

unmapped.addAll(unaligned.getSequences());

2779

2780

List<AlignedCodonFrame> mappings = aligned.getCodonFrames();

2781

2782

for (SequenceI seq : unaligned.getSequences())

2783

{

2784

for (AlignedCodonFrame mapping : mappings)

2785

{

2786

510

SequenceI fromSeq = mapping.findAlignedSequence(seq, aligned);

2787

510

if (fromSeq != null)

2788

{

2789

Mapping seqMap = mapping.getMappingBetween(fromSeq, seq);

2790

if (addMappedPositions(seq, fromSeq, seqMap, map))

2791

{

2792

unmapped.remove(seq);

}

}

}

}

return map;

}

/**

* Helper method that adds to a map the mapped column positions of a sequence.

2802

* <br>

2803

* For example if aaTT-Tg-gAAA is mapped to TTTAAA then the map should record

2804

* that columns 3,4,6,10,11,12 map to characters T,T,T,A,A,A of the mapped to

* sequence.

* @param seq

* the sequence whose column positions we are recording

2809

* @param fromSeq

2810

* a sequence that is mapped to the first sequence

2811

* @param seqMap

2812

* the mapping from 'fromSeq' to 'seq'

2813

* @param map

2814

* a map to add the column positions (in fromSeq) of the mapped

* positions of seq

* @return

static boolean addMappedPositions(SequenceI seq, SequenceI fromSeq,

2819

Mapping seqMap, Map<Integer, Map<SequenceI, Character>> map)

{

if (seqMap == null)

{

return false;

}

* invert mapping if it is from unaligned to aligned sequence

2828

2829

if (seqMap.getTo() == fromSeq.getDatasetSequence())

2830

{

2831

seqMap = new Mapping(seq.getDatasetSequence(),

2832

seqMap.getMap().getInverse());

2833

}

2834

2835

int toStart = seq.getStart();

2836

2837

2838

* traverse [start, end, start, end...] ranges in fromSeq

2839

2840

for (int[] fromRange : seqMap.getMap().getFromRanges())

2841

{

2842

for (int i = 0; i < fromRange.length - 1; i += 2)

2843

{

2844

boolean forward = fromRange[i + 1] >= fromRange[i];

2845

2846

2847

* find the range mapped to (sequence positions base 1)

2848

2849

int[] range = seqMap.locateMappedRange(fromRange[i],

fromRange[i + 1]);

if (range == null)

{

jalview.bin.Console.errPrintln("Error in mapping " + seqMap

2854

+ " from " + fromSeq.getName());

2855

return false;

2856

}

2857

int fromCol = fromSeq.findIndex(fromRange[i]);

2858

int mappedCharPos = range[0];

2859

2860

2861

* walk over the 'from' aligned sequence in forward or reverse

2862

* direction; when a non-gap is found, record the column position

2863

* of the next character of the mapped-to sequence; stop when all

2864

* the characters of the range have been counted

2865

2866

2794274

while (mappedCharPos <= range[1] && fromCol <= fromSeq.getLength()

2867

&& fromCol >= 0)

2868

{

2869

2794243

if (!Comparison.isGap(fromSeq.getCharAt(fromCol - 1)))

2870

{

2871

2872

* mapped from sequence has a character in this column

2873

* record the column position for the mapped to character

2874

2875

31998

Map<SequenceI, Character> seqsMap = map.get(fromCol);

2876

31998

if (seqsMap == null)

2877

{

2878

5398

seqsMap = new HashMap<>();

2879

5398

map.put(fromCol, seqsMap);

2880

}

2881

31998

seqsMap.put(seq, seq.getCharAt(mappedCharPos - toStart));

2882

31998

mappedCharPos++;

2883

}

2884

2794243

fromCol += (forward ? 1 : -1);

}

}

}

return true;

}

// strictly temporary hack until proper criteria for aligning protein to cds

2892

// are in place; this is so Ensembl -> fetch xrefs Uniprot aligns the Uniprot

2893

public static boolean looksLikeEnsembl(AlignmentI alignment)

2894

{

2895

for (SequenceI seq : alignment.getSequences())

2896

{

2897

String name = seq.getName();

2898

if (!name.startsWith("ENSG") && !name.startsWith("ENST"))

{

return false;

}

}

return true;

}

public static boolean isSecondaryStructurePresent(

2907

AlignmentAnnotation[] annotations)

2908

{

2909

boolean ssPresent = false;

2910

2911

for (AlignmentAnnotation aa : annotations)

{

if (ssPresent)

{

break;

}

if (Constants.SECONDARY_STRUCTURE_LABELS.containsKey(aa.label))

{

ssPresent = true;

break;

}

}

return ssPresent;

}

public static Color getSecondaryStructureAnnotationColour(char symbol)

2930

{

2931

2932

if (symbol == Constants.COIL)

{

return Color.gray;

}

if (symbol == Constants.SHEET)

{

return Color.green;

}

if (symbol == Constants.HELIX)

{

return Color.red;

}

return Color.white;

}

70935

public static char findSSAnnotationForGivenSeqposition(

2949

AlignmentAnnotation aa, int seqPosition)

2950

{

2951

70935

char ss = '*';

2952

2953

70936

if (aa != null)

2954

{

2955

70935

if (aa.getAnnotationForPosition(seqPosition) != null)

2956

{

2957

41675

Annotation a = aa.getAnnotationForPosition(seqPosition);

2958

41674

ss = a.secondaryStructure;

2959

2960

// There is no representation for coil and it can be either ' ' or null.

2961

41676

if (ss == ' ' || ss == '-')

2962

{

2963

9746

ss = Constants.COIL;

}

}

else

{

29260

ss = Constants.COIL;

}

}

70936

return ss;

2973

}

2974

2975

1691

public static List<String> extractSSSourceInAlignmentAnnotation(

2976

AlignmentAnnotation[] annotations)

2977

{

2978

2979

1691

List<String> ssSources = new ArrayList<>();

2980

1691

Set<String> addedSources = new HashSet<>(); // to keep track of added

2981

// sources

2982

2983

1691

if (annotations == null)

{

return ssSources;

}

1679

for (AlignmentAnnotation aa : annotations)

2989

{

2990

2991

7873

String ssSource = extractSSSourceFromAnnotationDescription(aa);

2992

2993

7873

if (ssSource != null && !addedSources.contains(ssSource))

2994

{

2995

ssSources.add(ssSource);

2996

addedSources.add(ssSource);

}

}

1679

Collections.sort(ssSources);

3001

3002

1679

return ssSources;

}

62816

public static String extractSSSourceFromAnnotationDescription(

3007

AlignmentAnnotation aa)

3008

{

3009

3010

62816

for (String label : Constants.SECONDARY_STRUCTURE_LABELS.keySet())

3011

{

3012

3013

70055

if (label.equals(aa.label))

3014

{

3015

3016

55577

if (aa.getProperty(Constants.SS_PROVIDER_PROPERTY) != null)

3017

{

3018

3019

return aa.getProperty(Constants.SS_PROVIDER_PROPERTY);

}

// For JPred

55576

if (Constants.SS_ANNOTATION_FROM_JPRED_LABEL.equals(aa.label))

3025

{

3026

3027

return (Constants.SECONDARY_STRUCTURE_LABELS.get(aa.label));

}

// For input with secondary structure

3032

55576

if (Constants.SS_ANNOTATION_LABEL.equals(aa.label)

3033

&& aa.description != null

3034

&& Constants.SS_ANNOTATION_LABEL.equals(aa.description))

3035

{

3036

3037

17328

return (Constants.SECONDARY_STRUCTURE_LABELS.get(aa.label));

}

// For other sources

38248

if (aa.sequenceRef == null)

3043

{

3044

174

return null;

3045

}

3046

38074

else if (aa.sequenceRef.getDatasetSequence() == null)

{

return null;

}

38074

Vector<PDBEntry> pdbEntries = aa.sequenceRef.getDatasetSequence()

3051

.getAllPDBEntries();

3052

3053

// TODO: this is an incredibly fragile mechanism

3054

38074

for (PDBEntry entry : pdbEntries)

3055

{

3056

3057

46188

String entryProvider = entry.getProvider();

3058

46188

if (entryProvider == null)

3059

{

3060

// No provider - so this is either an old Jalview project, or not

3061

// retrieved from recognised source

3062

46188

entryProvider = "PDB";

3063

}

3064

3065

// Should (re)use a standard mechanism for extracting the PDB ID as it

3066

// is written 1QWXTUV:CHAIN

3067

// Trim the string from first occurrence of colon

3068

46188

String entryID = entry.getId();

3069

46188

int index = entryID.indexOf(':');

3070

3071

// Check if colon exists

3072

46188

if (index != -1)

3073

{

3074

3075

// Trim the string from first occurrence of colon

3076

entryID = entryID.substring(0, index);

}

// TODO: shouldn't need to extract from description what the

3081

// originating ID is for this annotation!

3082

46188

if (entryProvider == "PDB" && aa.description.toLowerCase()

3083

.contains("secondary structure for "

3084

+ entryID.toLowerCase()))

3085

{

3086

3087

38074

return entryProvider;

}

8114

else if (entryProvider != "PDB" && aa.description.toLowerCase()

3092

.contains(entryID.toLowerCase()))

3093

{

3094

3095

return entryProvider;

}

}

}

}

7239

return null;

}

// to do set priority for labels

3108

10821642

public static List<AlignmentAnnotation> getAlignmentAnnotationForSource(

3109

SequenceI seq, String ssSource)

3110

{

3111

3112

10822917

List<AlignmentAnnotation> ssAnnots = new ArrayList<AlignmentAnnotation>();

3113

10882691

for (String ssLabel : Constants.SECONDARY_STRUCTURE_LABELS.keySet())

3114

{

3115

3116

21706654

AlignmentAnnotation[] aa = seq.getAnnotation(ssLabel);

3117

21441485

if (aa != null)

3118

{

3119

3120

101130

if (Constants.SS_ALL_PROVIDERS.equals(ssSource))

3121

{

3122

50571

ssAnnots.addAll(Arrays.asList(aa));

3123

50571

continue;

3124

}

3125

3126

50559

for (AlignmentAnnotation annot : aa)

3127

{

3128

3129

54943

String ssSourceForAnnot = extractSSSourceFromAnnotationDescription(

3130

annot);

3131

54943

if (ssSourceForAnnot != null && ssSource.equals(ssSourceForAnnot))

3132

{

3133

54943

ssAnnots.add(annot);

}

}

}

}

10761489

if (ssAnnots.size() > 0)

3139

{

3140

101127

return ssAnnots;

3141

}

3142

3143

10686191

return null;

}

public static Map<SequenceI, ArrayList<AlignmentAnnotation>> getSequenceAssociatedAlignmentAnnotations(

3148

AlignmentAnnotation[] alignAnnotList, String selectedSSSource)

3149

{

3150

3151

Map<SequenceI, ArrayList<AlignmentAnnotation>> ssAlignmentAnnotationForSequences = new HashMap<SequenceI, ArrayList<AlignmentAnnotation>>();

3152

if (alignAnnotList == null || alignAnnotList.length == 0)

3153

{

3154

return ssAlignmentAnnotationForSequences;

3155

}

3156

3157

for (AlignmentAnnotation aa : alignAnnotList)

3158

{

3159

if (aa.sequenceRef == null)

{

continue;

}

if (isSecondaryStructureFrom(selectedSSSource, aa))

3165

{

3166

ssAlignmentAnnotationForSequences

3167

.computeIfAbsent(aa.sequenceRef.getDatasetSequence(),

3168

k -> new ArrayList<>())

.add(aa);

}

}

return ssAlignmentAnnotationForSequences;

}

/**

* @param selectedSSSource

3180

* @param aa

3181

* @return true if aa is from a provider or all providers as specified by

3182

* selectedSSSource

3183

3184

public static boolean isSecondaryStructureFrom(String selectedSSSource,

3185

AlignmentAnnotation aa)

3186

{

3187

3188

for (String label : Constants.SECONDARY_STRUCTURE_LABELS.keySet())

3189

{

3190

3191

if (label.equals(aa.label))

3192

{

3193

3194

if (selectedSSSource.equals(Constants.SS_ALL_PROVIDERS))

{

return true;

}

String ssSource = AlignmentUtils

3199

.extractSSSourceFromAnnotationDescription(aa);

3200

if (ssSource != null && ssSource.equals(selectedSSSource))

{

return true;

}

}

}

return false;

}

// Method to get the key for a given provider value

3210

public static String getSecondaryStructureProviderKey(String providerValue) {

3211

for (Map.Entry<String, String> entry : Constants.STRUCTURE_PROVIDERS.entrySet()) {

3212

if (entry.getValue().equals(providerValue)) {

3213

return entry.getKey(); // Return the key (abbreviation) for the matching provider value

3214

}

3215

}

3216

return null; // Return null if no match is found

3217

}

3218

3219

public static String reduceLabelLength(String label) {

3220

// Split the input by " | "

3221

String[] parts = label.split(" \\| ");

3222

3223

// Map the full names to their abbreviations

3224

String reducedLabel = Arrays.stream(parts)

3225

.map(fullName -> Constants.STRUCTURE_PROVIDERS.entrySet().stream()

3226

.filter(entry -> entry.getValue().equals(fullName))

3227

.map(Map.Entry::getKey)

3228

.findFirst()

3229

.orElse(fullName)) // Use fullName if no abbreviation is found

3230

.collect(Collectors.joining(" | "));

3231

3232

return reducedLabel; // Return the reduced label if abbreviations were applied

3233

}

3234

3235

public static Color getSecondaryStructureProviderColor(String label) {

3236

3237

//return Constants.STRUCTURE_PROVIDERS_COLOR.getOrDefault(label, Color.BLACK);

3238

Color c = Constants.STRUCTURE_PROVIDERS_COLOR.get(label.trim());

if(c==null)

c = Color.BLACK;

return c;

}

public static void assignSecondaryStructureProviderColor(Map<String, Color> secondaryStructureProviderColorMap,

3246

List<String> labels) {

3247

3248

// Use a Set to track unique labels

3249

Set<String> uniqueLabels = new HashSet<>(labels);

3250

3251

Color[] palette = ColorBrewer.Paired.getColorPalette(uniqueLabels.size());

3252

3253

3254

List<Color> colorList = new ArrayList<>();

3255

Collections.addAll(colorList, palette);

3256

Collections.shuffle(colorList);

3257

int i = 0;

3258

3259

// Loop through each unique label and add it to the map with a color.

3260

for (String label : uniqueLabels) {

3261

// Generate or retrieve a color for the label.

3262

secondaryStructureProviderColorMap.put(label.toUpperCase().trim(), colorList.get(i));

i++;

}

}

}

Coverage Report

File AlignmentUtils.java

Coverage histogram

Code metrics

Classes

Class AlignmentUtils

Class AlignmentUtils.DnaVariant

Contributing tests

Contributing tests

Source view