File AlignmentUtils.java

Branches:

492

Statements:

981

Methods:

Classes:

LOC:

3,402

NCLOC:

2,092

Total complexity:

377

Complexity density:

0.38

Statements/Method:

15.33

Methods/Class:

Average method complexity:

5.89

Classes

Class	Line #	Total Statements	Complexity	TOTAL Coverage	Actions
AlignmentUtils	86	975	371	0.785292278.5%
AlignmentUtils.DnaVariant	102	6	6	0.00%

Class AlignmentUtils

Class AlignmentUtils	Line # 86	Total Statements 975	Complexity 371	TOTAL Coverage 0.785292278.5%
expandContext(AlignmentI,int) : AlignmentI expandContext(AlignmentI,int) : AlignmentI	143143	46.046	10.010	0.983871 0.98387198.4%
getSequenceIndex(AlignmentI,SequenceI) : int getSequenceIndex(AlignmentI,SequenceI) : int	249249	8.08	2.02	1.0 1.0100%
getSequencesByName(AlignmentI) : Map<String, List<SequenceI>> getSequencesByName(AlignmentI) : Map<String, List<SequenceI>>	272272	10.010	3.03	0.9285714 0.928571492.9%
mapProteinAlignmentToCdna(AlignmentI,AlignmentI) : boolean mapProteinAlignmentToCdna(AlignmentI,AlignmentI) : boolean	303303	7.07	3.03	0.7777778 0.777777877.8%
mapProteinToCdna(AlignmentI,AlignmentI,Set<SequenceI>,Set<SequenceI>,boolean) : boolean mapProteinToCdna(AlignmentI,AlignmentI,Set<SequenceI>,Set<SequenceI>,boolean) : boolean	345345	22.022	9.09	0.9375 0.937593.8%
mappingExists(List<AlignedCodonFrame>,SequenceI,SequenceI) : boolean mappingExists(List<AlignedCodonFrame>,SequenceI,SequenceI) : boolean	412412	5.05	3.03	0.6666667 0.666666766.7%
mapCdnaToProtein(SequenceI,SequenceI) : MapList mapCdnaToProtein(SequenceI,SequenceI) : MapList	445445	28.028	12.012	0.95238096 0.9523809695.2%
translatesAs(char[],int,char[]) : boolean translatesAs(char[],int,char[]) : boolean	535535	21.021	14.014	1.0 1.0100%
alignSequenceAs(SequenceI,AlignmentI,String,boolean,boolean) : boolean alignSequenceAs(SequenceI,AlignmentI,String,boolean,boolean) : boolean	612612	14.014	5.05	0.0 0.00%
alignSequenceAs(SequenceI,SequenceI,AlignedCodonFrame,String,char,boolean,boolean) : void alignSequenceAs(SequenceI,SequenceI,AlignedCodonFrame,String,char,boolean,boolean) : void	668668	60.060	20.020	1.0 1.0100%
calculateGapsToInsert(boolean,boolean,int,boolean,int,int,boolean) : int calculateGapsToInsert(boolean,boolean,int,boolean,int,int,boolean) : int	833833	15.015	10.010	1.0 1.0100%
alignProteinAsDna(AlignmentI,AlignmentI) : int alignProteinAsDna(AlignmentI,AlignmentI) : int	897897	6.06	3.03	0.625 0.62562.5%
alignCdsAsProtein(AlignmentI,AlignmentI) : int alignCdsAsProtein(AlignmentI,AlignmentI) : int	923923	18.018	5.05	0.8333333 0.833333383.3%
alignCdsSequenceAsProtein(SequenceI,AlignmentI,List<AlignedCodonFrame>,char) : boolean alignCdsSequenceAsProtein(SequenceI,AlignmentI,List<AlignedCodonFrame>,char) : boolean	969969	48.048	16.016	0.67105263 0.6710526367.1%
buildCodonColumnsMap(AlignmentI,AlignmentI,List<SequenceI>) : Map<AlignedCodon, Map<SequenceI, AlignedCodon>> buildCodonColumnsMap(AlignmentI,AlignmentI,List<SequenceI>) : Map<AlignedCodon, Map<SequenceI, AlignedCodon>>	11001100	13.013	2.02	0.93333334 0.9333333493.3%
addUnmappedPeptideStarts(Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,int) : void addUnmappedPeptideStarts(Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,int) : void	11561156	23.023	6.06	0.93939394 0.9393939493.9%
alignProteinAs(AlignmentI,Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,List<SequenceI>) : int alignProteinAs(AlignmentI,Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,List<SequenceI>) : int	12391239	17.017	2.02	1.0 1.0100%
addCodonPositions(SequenceI,SequenceI,char,Mapping,Map<AlignedCodon, Map<SequenceI, AlignedCodon>>) : void addCodonPositions(SequenceI,SequenceI,char,Mapping,Map<AlignedCodon, Map<SequenceI, AlignedCodon>>) : void	13041304	5.05	4.04	1.0 1.0100%
addCodonToMap(Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,AlignedCodon,SequenceI) : void addCodonToMap(Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,AlignedCodon,SequenceI) : void	13371337	5.05	2.02	1.0 1.0100%
isMappable(AlignmentI,AlignmentI) : boolean isMappable(AlignmentI,AlignmentI) : boolean	13671367	14.014	9.09	1.0 1.0100%
check3diPeptideMapping(AlignmentI,AlignmentI) : boolean check3diPeptideMapping(AlignmentI,AlignmentI) : boolean	14011401	14.014	6.06	1.0 1.0100%
canBuild3diMapping(SequenceI,SequenceI) : boolean canBuild3diMapping(SequenceI,SequenceI) : boolean	14371437	5.05	2.02	1.0 1.0100%
map3diPeptideToProteinAligment(AlignmentI,AlignmentI) : boolean map3diPeptideToProteinAligment(AlignmentI,AlignmentI) : boolean	14531453	6.06	3.03	0.75 0.7575%
mapProteinToTdiAlignment(AlignmentI,AlignmentI,Set<SequenceI>,Set<SequenceI>) : boolean mapProteinToTdiAlignment(AlignmentI,AlignmentI,Set<SequenceI>,Set<SequenceI>) : boolean	14881488	20.020	6.06	0.8333333 0.833333383.3%
isMappable(SequenceI,SequenceI,List<AlignedCodonFrame>) : boolean isMappable(SequenceI,SequenceI,List<AlignedCodonFrame>) : boolean	15421542	8.08	6.06	0.625 0.62562.5%
findAddableReferenceAnnotations(List<SequenceI>,Map<String, String>,Map<SequenceI, List<AlignmentAnnotation>>,AlignmentI) : void findAddableReferenceAnnotations(List<SequenceI>,Map<String, String>,Map<SequenceI, List<AlignmentAnnotation>>,AlignmentI) : void	15891589	24.024	9.09	0.95 0.9595%
addReferenceAnnotations(Map<SequenceI, List<AlignmentAnnotation>>,AlignmentI,SequenceGroup) : void addReferenceAnnotations(Map<SequenceI, List<AlignmentAnnotation>>,AlignmentI,SequenceGroup) : void	16721672	3.03	1.01	1.0 1.0100%
isSSAnnotationPresent(Map<SequenceI, List<AlignmentAnnotation>>) : boolean isSSAnnotationPresent(Map<SequenceI, List<AlignmentAnnotation>>) : boolean	16851685	4.04	2.02	0.0 0.00%
addReferenceAnnotationTo(AlignmentI,SequenceI,AlignmentAnnotation,SequenceGroup) : AlignmentAnnotation addReferenceAnnotationTo(AlignmentI,SequenceI,AlignmentAnnotation,SequenceGroup) : AlignmentAnnotation	17141714	16.016	4.04	0.95454544 0.9545454495.5%
showOrHideSequenceAnnotations(AlignmentI,Collection<String>,List<SequenceI>,boolean,boolean) : void showOrHideSequenceAnnotations(AlignmentI,Collection<String>,List<SequenceI>,boolean,boolean) : void	17691769	6.06	7.07	0.9166667 0.916666791.7%
showOrHideAutoCalculatedAnnotationsForGroup(AlignmentI,String,SequenceGroup,boolean,boolean) : void showOrHideAutoCalculatedAnnotationsForGroup(AlignmentI,String,SequenceGroup,boolean,boolean) : void	18061806	6.06	7.07	0.0 0.00%
getFirstSequenceAnnotationOfType(AlignmentI,int) : AlignmentAnnotation getFirstSequenceAnnotationOfType(AlignmentI,int) : AlignmentAnnotation	18341834	6.06	4.04	0.0 0.00%
haveCrossRef(SequenceI,SequenceI) : boolean haveCrossRef(SequenceI,SequenceI) : boolean	18561856	1.01	1.01	1.0 1.0100%
hasCrossRef(SequenceI,SequenceI) : boolean hasCrossRef(SequenceI,SequenceI) : boolean	18711871	11.011	6.06	1.0 1.0100%
makeCdsAlignment(SequenceI[],AlignmentI,SequenceI[]) : AlignmentI makeCdsAlignment(SequenceI[],AlignmentI,SequenceI[]) : AlignmentI	19121912	66.066	16.016	0.7717391 0.771739177.2%
transferGeneLoci(SequenceI,MapList,SequenceI) : void transferGeneLoci(SequenceI,MapList,SequenceI) : void	21392139	8.08	4.04	0.9285714 0.928571492.9%
findCdsForProtein(List<AlignedCodonFrame>,SequenceI,List<AlignedCodonFrame>,Mapping) : SequenceI findCdsForProtein(List<AlignedCodonFrame>,SequenceI,List<AlignedCodonFrame>,Mapping) : SequenceI	21772177	19.019	11.011	0.9354839 0.935483993.5%
makeCdsSequence(SequenceI,Mapping,AlignmentI) : SequenceI makeCdsSequence(SequenceI,Mapping,AlignmentI) : SequenceI	22642264	32.032	10.010	0.62 0.6262%
propagateDBRefsToCDS(SequenceI,SequenceI,SequenceI,Mapping) : List<DBRefEntry> propagateDBRefsToCDS(SequenceI,SequenceI,SequenceI,Mapping) : List<DBRefEntry>	23572357	28.028	11.011	0.8636364 0.863636486.4%
transferFeatures(SequenceI,SequenceI,MapList,String,String) : int transferFeatures(SequenceI,SequenceI,MapList,String,String) : int	24452445	33.033	12.012	0.8867925 0.886792588.7%
mapCdsToProtein(SequenceI,SequenceI) : MapList mapCdsToProtein(SequenceI,SequenceI) : MapList	25412541	22.022	5.05	1.0 1.0100%
findCdsPositions(SequenceI) : List<int[]> findCdsPositions(SequenceI) : List<int[]>	26042604	20.020	7.07	0.9285714 0.928571492.9%
makeCopyAlignment(SequenceI[],SequenceI[],AlignmentI) : AlignmentI makeCopyAlignment(SequenceI[],SequenceI[],AlignmentI) : AlignmentI	26722672	21.021	9.09	0.0 0.00%
alignAs(AlignmentI,AlignmentI) : int alignAs(AlignmentI,AlignmentI) : int	27272727	26.026	5.05	0.88235295 0.8823529588.2%
alignAsSameSequences(AlignmentI,AlignmentI) : boolean alignAsSameSequences(AlignmentI,AlignmentI) : boolean	28062806	33.033	7.07	0.88372093 0.8837209388.4%
buildMappedColumnsMap(AlignmentI,AlignmentI,List<SequenceI>) : SortedMap<Integer, Map<SequenceI, Character>> buildMappedColumnsMap(AlignmentI,AlignmentI,List<SequenceI>) : SortedMap<Integer, Map<SequenceI, Character>>	29002900	11.011	3.03	0.93333334 0.9333333493.3%
addMappedPositions(SequenceI,SequenceI,Mapping,Map<Integer, Map<SequenceI, Character>>) : boolean addMappedPositions(SequenceI,SequenceI,Mapping,Map<Integer, Map<SequenceI, Character>>) : boolean	29542954	24.024	11.011	0.85 0.8585%
looksLikeEnsembl(AlignmentI) : boolean looksLikeEnsembl(AlignmentI) : boolean	30293029	5.05	3.03	0.0 0.00%
isSecondaryStructurePresent(AlignmentAnnotation[]) : boolean isSecondaryStructurePresent(AlignmentAnnotation[]) : boolean	30423042	8.08	3.03	0.8333333 0.833333383.3%
getSecondaryStructureAnnotationColour(char) : Color getSecondaryStructureAnnotationColour(char) : Color	30653065	7.07	4.04	0.0 0.00%
findSSAnnotationForGivenSeqposition(AlignmentAnnotation,int) : char findSSAnnotationForGivenSeqposition(AlignmentAnnotation,int) : char	30843084	9.09	5.05	0.8 0.880%
extractSSSourceInAlignmentAnnotation(AlignmentAnnotation[]) : List<String> extractSSSourceInAlignmentAnnotation(AlignmentAnnotation[]) : List<String>	31113111	11.011	4.04	0.8666667 0.866666786.7%
extractSSSourceFromAnnotationDescription(AlignmentAnnotation) : String extractSSSourceFromAnnotationDescription(AlignmentAnnotation) : String	31423142	26.026	15.015	0.65217394 0.6521739465.2%
getAlignmentAnnotationForSource(SequenceI,String) : List<AlignmentAnnotation> getAlignmentAnnotationForSource(SequenceI,String) : List<AlignmentAnnotation>	32443244	14.014	6.06	0.95454544 0.9545454495.5%
getSequenceAssociatedAlignmentAnnotations(AlignmentAnnotation[],String) : Map<SequenceI, ArrayList<AlignmentAnnotation>> getSequenceAssociatedAlignmentAnnotations(AlignmentAnnotation[],String) : Map<SequenceI, ArrayList<AlignmentAnnotation>>	32833283	9.09	5.05	0.0 0.00%
isSecondaryStructureFrom(String,AlignmentAnnotation) : boolean isSecondaryStructureFrom(String,AlignmentAnnotation) : boolean	33203320	8.08	5.05	0.0 0.00%
getSecondaryStructureProviderKey(String) : String getSecondaryStructureProviderKey(String) : String	33463346	4.04	2.02	0.0 0.00%
reduceLabelLength(String) : String reduceLabelLength(String) : String	33553355	3.03	1.01	0.0 0.00%
getSecondaryStructureProviderColor(String) : Color getSecondaryStructureProviderColor(String) : Color	33713371	4.04	2.02	0.0 0.00%
assignSecondaryStructureProviderColor(Map<String, Color>,List<String>) : void assignSecondaryStructureProviderColor(Map<String, Color>,List<String>) : void	33813381	9.09	1.01	0.0 0.00%

Class AlignmentUtils.DnaVariant

Class AlignmentUtils.DnaVariant	Line # 102	Total Statements 6	Complexity 6	TOTAL Coverage 0.00%
DnaVariant(String) DnaVariant(String)	108108	2.02	1.01	0.0 0.00%
DnaVariant(String,SequenceFeature) DnaVariant(String,SequenceFeature)	114114	2.02	1.01	0.0 0.00%
getSource() : String getSource() : String	120120	1.01	2.02	0.0 0.00%
toString() : String toString() : String	128128	1.01	2.02	0.0 0.00%

Contributing tests

This file is covered by 188 tests. .

Contributing tests

Test contribution	Test	Result
0.11971373	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignmentjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment	1PASS
0.09564086	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withXrefsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withXrefs	1PASS
0.09564086	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withStartAndStopCodonsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withStartAndStopCodons	1PASS
0.093689	jalview.datamodel.AlignmentTest.testAlignAs_tdi_protjalview.datamodel.AlignmentTest.testAlignAs_tdi_prot	1PASS
0.09303839	jalview.datamodel.AlignmentTest.testAlignAs_prot_tdijalview.datamodel.AlignmentTest.testAlignAs_prot_tdi	1PASS
0.08783344	jalview.analysis.AlignmentUtilsTests.testIsMappablejalview.analysis.AlignmentUtilsTests.testIsMappable	1PASS
0.08523097	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_filterProductsjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_filterProducts	1PASS
0.08588159	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_multipleProteinsjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_multipleProteins	1PASS
0.08327912	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_noXrefsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_noXrefs	1PASS
0.08132726	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_alternativeTranscriptsjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_alternativeTranscripts	1PASS
0.07742355	jalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTestjalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTest	1PASS
0.07742355	jalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTestjalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTest	1PASS
0.07742355	jalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTestjalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTest	1PASS
0.07091737	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_prioritiseXrefsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_prioritiseXrefs	1PASS
0.06961614	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withIntronsjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withIntrons	1PASS
0.067013666	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_noIntronsjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_noIntrons	1PASS
0.06441119	jalview.ext.jmol.JmolViewerTest.testAddStrToSingleSeqViewJMoljalview.ext.jmol.JmolViewerTest.testAddStrToSingleSeqViewJMol	1PASS
0.060507484	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_keepIntronGapsOnlyjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_keepIntronGapsOnly	1PASS
0.061158102	jalview.datamodel.AlignmentTest.testAlignAs_dnaAsDnajalview.datamodel.AlignmentTest.testAlignAs_dnaAsDna	1PASS
0.059856866	jalview.analysis.AlignmentUtilsTests.testAlignAs_alternateTranscriptsUngappedjalview.analysis.AlignmentUtilsTests.testAlignAs_alternateTranscriptsUngapped	1PASS
0.05790501	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withUnmappedProteinjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withUnmappedProtein	1PASS
0.056603774	jalview.ext.jmol.JmolViewerTest.testSingleSeqViewJMoljalview.ext.jmol.JmolViewerTest.testSingleSeqViewJMol	1PASS
0.056603774	jalview.ext.jmol.JmolParserTest.testAlignmentLoaderjalview.ext.jmol.JmolParserTest.testAlignmentLoader	1PASS
0.057254393	jalview.analysis.AlignmentUtilsTests.testAlignProteinAsDna_incompleteStartCodonjalview.analysis.AlignmentUtilsTests.testAlignProteinAsDna_incompleteStartCodon	1PASS
0.05465192	jalview.gui.StructureChooserTest.openStructureFileForSequenceTestjalview.gui.StructureChooserTest.openStructureFileForSequenceTest	1PASS
0.05465192	jalview.gui.StructureChooserTest.openStructureFileForSequenceTestjalview.gui.StructureChooserTest.openStructureFileForSequenceTest	1PASS
0.05465192	jalview.gui.StructureChooserTest.openStructureFileForSequenceTestjalview.gui.StructureChooserTest.openStructureFileForSequenceTest	1PASS
0.0540013	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withTrailingPeptidejalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withTrailingPeptide	1PASS
0.05465192	jalview.gui.StructureChooserTest.openStructureFileForSequenceTestjalview.gui.StructureChooserTest.openStructureFileForSequenceTest	1PASS
0.05139883	jalview.datamodel.AlignmentTest.testAlignAs_proteinAsCdnajalview.datamodel.AlignmentTest.testAlignAs_proteinAsCdna	1PASS
0.052049447	jalview.analysis.AlignmentUtilsTests.testAlignProteinAsDnajalview.analysis.AlignmentUtilsTests.testAlignProteinAsDna	1PASS
0.049446974	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_mappedProteinProteinjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_mappedProteinProtein	1PASS
0.04749512	jalview.datamodel.AlignmentTest.testAlignAs_cdnaAsProteinjalview.datamodel.AlignmentTest.testAlignAs_cdnaAsProtein	1PASS
0.046193883	jalview.datamodel.AlignmentTest.testAlignAs_cdnaAsProtein_singleSequencejalview.datamodel.AlignmentTest.testAlignAs_cdnaAsProtein_singleSequence	1PASS
0.044892646	jalview.ext.jmol.JmolParserTest.testFileParserjalview.ext.jmol.JmolParserTest.testFileParser	1PASS
0.03773585	jalview.analysis.AlignmentUtilsTests.testExpandContextjalview.analysis.AlignmentUtilsTests.testExpandContext	1PASS
0.036434613	jalview.analysis.AlignmentUtilsTests.testMapCdsToProteinjalview.analysis.AlignmentUtilsTests.testMapCdsToProtein	1PASS
0.035783995	jalview.analysis.AlignmentUtilsTests.testAddReferenceContactMapjalview.analysis.AlignmentUtilsTests.testAddReferenceContactMap	1PASS
0.031880286	jalview.analysis.AlignmentUtilsTests.testAddReferenceAnnotationsjalview.analysis.AlignmentUtilsTests.testAddReferenceAnnotations	1PASS
0.030579051	jalview.analysis.AlignmentUtilsTests.testExpandContext_annotationjalview.analysis.AlignmentUtilsTests.testExpandContext_annotation	1PASS
0.031229667	jalview.datamodel.AlignmentTest.testAlignAs_3dijalview.datamodel.AlignmentTest.testAlignAs_3di	1PASS
0.028627196	jalview.analysis.AlignmentUtilsTests.testMapCdnaToProtein_forSubsequencejalview.analysis.AlignmentUtilsTests.testMapCdnaToProtein_forSubsequence	1PASS
0.025374105	jalview.analysis.AlignmentUtilsTests.testTransferFeaturesjalview.analysis.AlignmentUtilsTests.testTransferFeatures	1PASS
0.024723487	jalview.analysis.AlignmentUtilsTests.testAlignAsSameSequencesjalview.analysis.AlignmentUtilsTests.testAlignAsSameSequences	1PASS
0.024723487	jalview.analysis.AlignmentUtilsTests.testTranslatesAsjalview.analysis.AlignmentUtilsTests.testTranslatesAs	1PASS
0.02342225	jalview.gui.ColourMenuHelperTest.testAddMenuItems_nucleotidejalview.gui.ColourMenuHelperTest.testAddMenuItems_nucleotide	1PASS
0.02342225	jalview.gui.AlignFrameTest.testNewView_colourThresholdsjalview.gui.AlignFrameTest.testNewView_colourThresholds	1PASS
0.024072869	jalview.analysis.AlignmentUtilsTests.testAlignAsSameSequencesMultipleSubSeqjalview.analysis.AlignmentUtilsTests.testAlignAsSameSequencesMultipleSubSeq	1PASS
0.022771632	jalview.io.JSONFileTest.testBioJSONRoundTripWithColourSchemeNonejalview.io.JSONFileTest.testBioJSONRoundTripWithColourSchemeNone	1PASS
0.02342225	jalview.analysis.AlignmentUtilsTests.testTransferFeatures_withOmitjalview.analysis.AlignmentUtilsTests.testTransferFeatures_withOmit	1PASS
0.022121014	jalview.gui.AlignViewportTest.testGetSelectionAsNewSequences_withContactMatricesjalview.gui.AlignViewportTest.testGetSelectionAsNewSequences_withContactMatrices	1PASS
0.02081978	jalview.analysis.AlignmentUtilsTests.testAddMappedPositions_withStopCodonjalview.analysis.AlignmentUtilsTests.testAddMappedPositions_withStopCodon	1PASS
0.02081978	jalview.analysis.AlignmentUtilsTests.testAddMappedPositionsjalview.analysis.AlignmentUtilsTests.testAddMappedPositions	1PASS
0.018217307	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_notOnAlignmentjalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_notOnAlignment	1PASS
0.018217307	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenujalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu	1PASS
0.018867925	jalview.analysis.AlignmentUtilsTests.testFindCdsForProtein_noUTRjalview.analysis.AlignmentUtilsTests.testFindCdsForProtein_noUTR	1PASS
0.019518543	jalview.analysis.AlignmentUtilsTests.testTransferFeatures_withSelectjalview.analysis.AlignmentUtilsTests.testTransferFeatures_withSelect	1PASS
0.017566688	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_alreadyAddedjalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_alreadyAdded	1PASS
0.016265452	jalview.gui.AlignViewportTest.testSetSelectionGroupjalview.gui.AlignViewportTest.testSetSelectionGroup	1PASS
0.016265452	jalview.renderer.seqfeatures.FeatureRendererTest.testFindAllFeaturesjalview.renderer.seqfeatures.FeatureRendererTest.testFindAllFeatures	1PASS
0.016265452	jalview.gui.SeqPanelTest.testFindColumn_and_FindAlignmentColumn_wrappedjalview.gui.SeqPanelTest.testFindColumn_and_FindAlignmentColumn_wrapped	1PASS
0.016265452	jalview.gui.AlignViewportTest.testSetGlobalColourSchemejalview.gui.AlignViewportTest.testSetGlobalColourScheme	1PASS
0.016265452	jalview.datamodel.AlignmentViewTest.testGetVisibleContigsjalview.datamodel.AlignmentViewTest.testGetVisibleContigs	1PASS
0.016265452	jalview.gui.AlignViewportTest.testDeregisterMapping_withReferencejalview.gui.AlignViewportTest.testDeregisterMapping_withReference	1PASS
0.016265452	jalview.schemes.ClustalxColourSchemeTest.testFindColour_ignoreGapsjalview.schemes.ClustalxColourSchemeTest.testFindColour_ignoreGaps	1PASS
0.016265452	jalview.structures.models.AAStructureBindingModelTest.testBuildColoursMapjalview.structures.models.AAStructureBindingModelTest.testBuildColoursMap	1PASS
0.016265452	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.016265452	jalview.gui.AlignmentPanelTest.testSetOverviewTitlejalview.gui.AlignmentPanelTest.testSetOverviewTitle	1PASS
0.016265452	jalview.analysis.AlignmentUtilsTests.testFindCdsForProteinjalview.analysis.AlignmentUtilsTests.testFindCdsForProtein	1PASS
0.016265452	jalview.analysis.AlignmentUtilsTests.testFindCdsPositions_fivePrimeIncompletejalview.analysis.AlignmentUtilsTests.testFindCdsPositions_fivePrimeIncomplete	1PASS
0.016265452	jalview.io.JSONFileTest.testGrpParsed_colourNonejalview.io.JSONFileTest.testGrpParsed_colourNone	1PASS
0.016265452	jalview.renderer.seqfeatures.FeatureRendererTest.testFilterFeaturesForDisplayjalview.renderer.seqfeatures.FeatureRendererTest.testFilterFeaturesForDisplay	1PASS
0.016265452	jalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTestjalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTest	1PASS
0.016265452	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.016265452	jalview.gui.AlignFrameTest.testHideFeatureColumnsjalview.gui.AlignFrameTest.testHideFeatureColumns	1PASS
0.016265452	jalview.gui.FeatureSettingsTest.testSaveLoadjalview.gui.FeatureSettingsTest.testSaveLoad	1PASS
0.016265452	jalview.gui.QuitHandlerTest.testInstantQuitjalview.gui.QuitHandlerTest.testInstantQuit	1PASS
0.016265452	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.016265452	jalview.io.JalviewExportPropertiesTests.testImportExportPeriodGapsjalview.io.JalviewExportPropertiesTests.testImportExportPeriodGaps	1PASS
0.016265452	jalview.gui.SeqPanelTest.testFindMousePosition_wrapped_scaleAbovejalview.gui.SeqPanelTest.testFindMousePosition_wrapped_scaleAbove	1PASS
0.016265452	jalview.gui.ScalePanelTest.testBuildPopupMenujalview.gui.ScalePanelTest.testBuildPopupMenu	1PASS
0.016265452	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.016265452	jalview.gui.AlignViewportTest.testDeregisterMapping_onCloseViewjalview.gui.AlignViewportTest.testDeregisterMapping_onCloseView	1PASS
0.016265452	jalview.gui.AlignViewportTest.testGetConsensusSeqjalview.gui.AlignViewportTest.testGetConsensusSeq	1PASS
0.016265452	jalview.gui.StructureChooserTest.openStructureFileForSequenceTestjalview.gui.StructureChooserTest.openStructureFileForSequenceTest	1PASS
0.016265452	jalview.gui.SeqPanelTest.testFindMousePosition_wrapped_scales_longSequencejalview.gui.SeqPanelTest.testFindMousePosition_wrapped_scales_longSequence	1PASS
0.016265452	jalview.gui.ColourMenuHelperTest.testAddMenuItems_forAlignFramejalview.gui.ColourMenuHelperTest.testAddMenuItems_forAlignFrame	1PASS
0.016265452	jalview.gui.QuitHandlerTest.testWaitForSaveQuitjalview.gui.QuitHandlerTest.testWaitForSaveQuit	1PASS
0.016265452	jalview.gui.AlignViewportTest.testSetGetHasSearchResultsjalview.gui.AlignViewportTest.testSetGetHasSearchResults	1PASS
0.016265452	jalview.renderer.seqfeatures.FeatureRendererTest.testFindComplementFeaturesAtResiduejalview.renderer.seqfeatures.FeatureRendererTest.testFindComplementFeaturesAtResidue	1PASS
0.016265452	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.016265452	jalview.gui.SeqPanelTest.testAmbiguousAminoAcidGetsStatusMessagejalview.gui.SeqPanelTest.testAmbiguousAminoAcidGetsStatusMessage	1PASS
0.016265452	jalview.gui.AlignViewportTest.testUpdateConservation_qualityOnlyjalview.gui.AlignViewportTest.testUpdateConservation_qualityOnly	1PASS
0.016265452	jalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTestjalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTest	1PASS
0.016265452	jalview.schemes.ClustalxColourSchemeTest.testFindColourjalview.schemes.ClustalxColourSchemeTest.testFindColour	1PASS
0.016265452	jalview.gui.PairwiseAlignmentPanelTest.testConstructor_noSelectionGroupjalview.gui.PairwiseAlignmentPanelTest.testConstructor_noSelectionGroup	1PASS
0.016265452	jalview.gui.AlignmentPanelTest.testSetOverviewTitle_automaticOverviewjalview.gui.AlignmentPanelTest.testSetOverviewTitle_automaticOverview	1PASS
0.016265452	jalview.ext.jmol.JmolCommandsTest.testGetColourBySequenceCommands_hiddenColumnsjalview.ext.jmol.JmolCommandsTest.testGetColourBySequenceCommands_hiddenColumns	1PASS
0.016265452	jalview.schemes.ColourSchemesTest.testRegisterColourSchemejalview.schemes.ColourSchemesTest.testRegisterColourScheme	1PASS
0.016265452	jalview.gui.SeqPanelTest.testSetStatusReturnsNearestResiduePositionjalview.gui.SeqPanelTest.testSetStatusReturnsNearestResiduePosition	1PASS
0.016265452	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.016265452	jalview.io.ScoreMatrixFileTest.testParseMatrix_aaIndexBlosum80jalview.io.ScoreMatrixFileTest.testParseMatrix_aaIndexBlosum80	1PASS
0.016265452	jalview.gui.PairwiseAlignmentPanelTest.testConstructor_withSelectionGroupjalview.gui.PairwiseAlignmentPanelTest.testConstructor_withSelectionGroup	1PASS
0.016265452	jalview.gui.QuitHandlerTest.testSavedProjectChangesjalview.gui.QuitHandlerTest.testSavedProjectChanges	1PASS
0.016265452	jalview.controller.AlignViewControllerTest.testFindColumnsWithFeaturejalview.controller.AlignViewControllerTest.testFindColumnsWithFeature	1PASS
0.016265452	jalview.analysis.AverageDistanceEngineTest.testUPGMAEnginejalview.analysis.AverageDistanceEngineTest.testUPGMAEngine	1PASS
0.016265452	jalview.gui.SeqPanelTest.testFindMousePosition_wrapped_annotationsjalview.gui.SeqPanelTest.testFindMousePosition_wrapped_annotations	1PASS
0.016265452	jalview.gui.DesktopTests.testInternalCopyPastejalview.gui.DesktopTests.testInternalCopyPaste	1PASS
0.016265452	jalview.gui.AlignFrameTest.testNewView_dsRefPreservedjalview.gui.AlignFrameTest.testNewView_dsRefPreserved	1PASS
0.016265452	jalview.gui.ScalePanelTest.testPreventNegativeStartColumnjalview.gui.ScalePanelTest.testPreventNegativeStartColumn	1PASS
0.016265452	jalview.gui.QuitHandlerTest.testSavedAlignmentChangesjalview.gui.QuitHandlerTest.testSavedAlignmentChanges	1PASS
0.016265452	jalview.renderer.seqfeatures.FeatureRendererTest.testGetColourjalview.renderer.seqfeatures.FeatureRendererTest.testGetColour	1PASS
0.016265452	jalview.renderer.seqfeatures.FeatureRendererTest.testFindFeaturesAtColumnjalview.renderer.seqfeatures.FeatureRendererTest.testFindFeaturesAtColumn	1PASS
0.016265452	jalview.io.WindowsFileLoadAndSaveTest.loadAndSaveAlignmentjalview.io.WindowsFileLoadAndSaveTest.loadAndSaveAlignment	1PASS
0.016265452	jalview.gui.QuitHandlerTest.testUnsavedChangesjalview.gui.QuitHandlerTest.testUnsavedChanges	1PASS
0.016265452	jalview.gui.PaintRefresherTest.testGetAssociatedPanelsjalview.gui.PaintRefresherTest.testGetAssociatedPanels	1PASS
0.016265452	jalview.gui.SeqPanelTest.testFindColumn_unwrappedjalview.gui.SeqPanelTest.testFindColumn_unwrapped	1PASS
0.016265452	jalview.gui.QuitHandlerTest.testNoGUIUnsavedChangesjalview.gui.QuitHandlerTest.testNoGUIUnsavedChanges	1PASS
0.016265452	jalview.renderer.seqfeatures.FeatureRendererTest.testIsVisiblejalview.renderer.seqfeatures.FeatureRendererTest.testIsVisible	1PASS
0.016265452	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.016265452	jalview.gui.SeqPanelTest.testFindMousePosition_wrapped_noAnnotationsjalview.gui.SeqPanelTest.testFindMousePosition_wrapped_noAnnotations	1PASS
0.016265452	jalview.gui.AlignViewportTest.testShowOrDontShowOccupancyjalview.gui.AlignViewportTest.testShowOrDontShowOccupancy	1PASS
0.016265452	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.016265452	jalview.gui.StructureChooserTest.openStructureFileForSequenceTestjalview.gui.StructureChooserTest.openStructureFileForSequenceTest	1PASS
0.016265452	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.016265452	jalview.schemes.ColourSchemesTest.testGetColourSchemejalview.schemes.ColourSchemesTest.testGetColourScheme	1PASS
0.016265452	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.016265452	jalview.gui.AlignViewportTest.testDeregisterMapping_withNoReferencejalview.gui.AlignViewportTest.testDeregisterMapping_withNoReference	1PASS
0.016265452	jalview.controller.AlignViewControllerTest.testSelectColumnsWithHighlightjalview.controller.AlignViewControllerTest.testSelectColumnsWithHighlight	1PASS
0.016265452	jalview.gui.QuitHandlerTest.testForceQuitjalview.gui.QuitHandlerTest.testForceQuit	1PASS
0.016265452	jalview.renderer.ScaleRendererTest.testCalculateMarksjalview.renderer.ScaleRendererTest.testCalculateMarks	1PASS
0.016265452	jalview.bin.CommandsTest.allLinkedIdsTestjalview.bin.CommandsTest.allLinkedIdsTest	1PASS
0.013012362	jalview.analysis.AlignmentUtilsTests.testHaveCrossRefjalview.analysis.AlignmentUtilsTests.testHaveCrossRef	1PASS
0.013012362	jalview.analysis.AlignmentUtilsTests.testHasCrossRefjalview.analysis.AlignmentUtilsTests.testHasCrossRef	1PASS
0.011711125	jalview.datamodel.PAEContactMatrixTest.testSeqAssociatedPAEMatrixjalview.datamodel.PAEContactMatrixTest.testSeqAssociatedPAEMatrix	1PASS
0.0123617435	jalview.analysis.AlignmentUtilsTests.testFindCdsPositionsjalview.analysis.AlignmentUtilsTests.testFindCdsPositions	1PASS
0.01040989	jalview.renderer.OverviewResColourFinderTest.testGetResidueBoxColour_nonejalview.renderer.OverviewResColourFinderTest.testGetResidueBoxColour_none	1PASS
0.01040989	jalview.gui.ScalePanelTest.testSelectColumns_withHiddenjalview.gui.ScalePanelTest.testSelectColumns_withHidden	1PASS
0.01040989	jalview.util.MappingUtilsTest.testMapSequenceGroup_sharedDatasetjalview.util.MappingUtilsTest.testMapSequenceGroup_sharedDataset	1PASS
0.01040989	jalview.util.MappingUtilsTest.testMapSequenceGroup_regionjalview.util.MappingUtilsTest.testMapSequenceGroup_region	1PASS
0.01040989	jalview.util.MappingUtilsTest.testMapColumnSelection_dnaToProteinjalview.util.MappingUtilsTest.testMapColumnSelection_dnaToProtein	1PASS
0.01040989	jalview.renderer.ResidueColourFinderTest.testGetResidueColour_zappojalview.renderer.ResidueColourFinderTest.testGetResidueColour_zappo	1PASS
0.01040989	jalview.analysis.DnaTest.testReverseCdnajalview.analysis.DnaTest.testReverseCdna	1PASS
0.01040989	jalview.renderer.OverviewResColourFinderTest.testGetResidueBoxColour_zappojalview.renderer.OverviewResColourFinderTest.testGetResidueBoxColour_zappo	1PASS
0.01040989	jalview.analysis.FinderTest.testFindAll_sequenceIdsjalview.analysis.FinderTest.testFindAll_sequenceIds	1PASS
0.01040989	jalview.util.MappingUtilsTest.testMapSequenceGroup_columnsjalview.util.MappingUtilsTest.testMapSequenceGroup_columns	1PASS
0.01040989	jalview.util.MappingUtilsTest.testMapColumnSelection_nulljalview.util.MappingUtilsTest.testMapColumnSelection_null	1PASS
0.01040989	jalview.util.MappingUtilsTest.testMapColumnSelection_proteinToDnajalview.util.MappingUtilsTest.testMapColumnSelection_proteinToDna	1PASS
0.01040989	jalview.renderer.ResidueColourFinderTest.testGetResidueColour_nonejalview.renderer.ResidueColourFinderTest.testGetResidueColour_none	1PASS
0.01040989	jalview.renderer.ResidueColourFinderTest.testGetResidueColour_userdefjalview.renderer.ResidueColourFinderTest.testGetResidueColour_userdef	1PASS
0.01040989	jalview.util.MappingUtilsTest.testMapColumnSelection_hiddenColumnsjalview.util.MappingUtilsTest.testMapColumnSelection_hiddenColumns	1PASS
0.01040989	jalview.datamodel.HiddenSequencesTest.testHideShowSequence_withHiddenRepSequencejalview.datamodel.HiddenSequencesTest.testHideShowSequence_withHiddenRepSequence	1PASS
0.01040989	jalview.util.MappingUtilsTest.testMapSequenceGroup_sequencesjalview.util.MappingUtilsTest.testMapSequenceGroup_sequences	1PASS
0.01040989	jalview.renderer.OverviewResColourFinderTest.testGetResidueBoxColour_userdefjalview.renderer.OverviewResColourFinderTest.testGetResidueBoxColour_userdef	1PASS
0.01040989	jalview.analysis.DnaTest.testTranslateCdna_withUntranslatableCodonsAndHiddenColumnsjalview.analysis.DnaTest.testTranslateCdna_withUntranslatableCodonsAndHiddenColumns	1PASS
0.01040989	jalview.analysis.FinderTest.testFind_inDescriptionjalview.analysis.FinderTest.testFind_inDescription	1PASS
0.009108653	jalview.analysis.AlignmentUtilsTests.testGetSequencesByNamejalview.analysis.AlignmentUtilsTests.testGetSequencesByName	1PASS
0.009108653	jalview.analysis.AlignmentUtilsTests.testTransferGeneLocijalview.analysis.AlignmentUtilsTests.testTransferGeneLoci	1PASS
0.007156799	jalview.analysis.AnnotationSorterTest.testSortBySequenceAndType_autocalcFirstjalview.analysis.AnnotationSorterTest.testSortBySequenceAndType_autocalcFirst	1PASS
0.007156799	jalview.gui.AnnotationChooserTest.testSelectType_showForSelectedjalview.gui.AnnotationChooserTest.testSelectType_showForSelected	1PASS
0.007156799	jalview.gui.AnnotationChooserTest.testSelectType_hideForSelectedjalview.gui.AnnotationChooserTest.testSelectType_hideForSelected	1PASS
0.007156799	jalview.gui.AnnotationColumnChooserTest.testResetjalview.gui.AnnotationColumnChooserTest.testReset	1PASS
0.007156799	jalview.analysis.AnnotationSorterTest.testSort_timingUnsortedjalview.analysis.AnnotationSorterTest.testSort_timingUnsorted	1PASS
0.007807417	jalview.analysis.AlignmentUtilsTests.testShowOrHideSequenceAnnotationsjalview.analysis.AlignmentUtilsTests.testShowOrHideSequenceAnnotations	1PASS
0.007156799	jalview.gui.AnnotationChooserTest.testResetOriginalStatejalview.gui.AnnotationChooserTest.testResetOriginalState	1PASS
0.007156799	jalview.gui.AnnotationChooserTest.testSelectType_hideForAlljalview.gui.AnnotationChooserTest.testSelectType_hideForAll	1PASS
0.007156799	jalview.gui.AnnotationChooserTest.testIsInActionScope_selectedScopejalview.gui.AnnotationChooserTest.testIsInActionScope_selectedScope	1PASS
0.007156799	jalview.analysis.AnnotationSorterTest.testSortBySequenceAndType_autocalcLastjalview.analysis.AnnotationSorterTest.testSortBySequenceAndType_autocalcLast	1PASS
0.007156799	jalview.gui.AnnotationChooserTest.testDeselectType_showForSelectedjalview.gui.AnnotationChooserTest.testDeselectType_showForSelected	1PASS
0.007156799	jalview.gui.AnnotationChooserTest.testSelectType_showForAlljalview.gui.AnnotationChooserTest.testSelectType_showForAll	1PASS
0.007156799	jalview.io.AnnotatedPDBFileInputTest.checkPDBSequenceFeaturesjalview.io.AnnotatedPDBFileInputTest.checkPDBSequenceFeatures	1PASS
0.007156799	jalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSourcesjalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSources	1PASS
0.006506181	jalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSourcesjalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSources	1PASS
0.007156799	jalview.analysis.AnnotationSorterTest.testNoSort_autocalcFirstjalview.analysis.AnnotationSorterTest.testNoSort_autocalcFirst	1PASS
0.007156799	jalview.gui.PopupMenuTest.testHideInsertionsjalview.gui.PopupMenuTest.testHideInsertions	1PASS
0.007156799	jalview.analysis.AnnotationSorterTest.testSortByTypeAndSequence_autocalcLastjalview.analysis.AnnotationSorterTest.testSortByTypeAndSequence_autocalcLast	1PASS
0.006506181	jalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSourcesjalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSources	1PASS
0.007156799	jalview.gui.AnnotationChooserTest.testDeselectType_hideForAlljalview.gui.AnnotationChooserTest.testDeselectType_hideForAll	1PASS
0.007156799	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_noReferenceAnnotationsjalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_noReferenceAnnotations	1PASS
0.007156799	jalview.analysis.AnnotationSorterTest.testSortByTypeAndSequence_autocalcFirstjalview.analysis.AnnotationSorterTest.testSortByTypeAndSequence_autocalcFirst	1PASS
0.007156799	jalview.gui.AnnotationChooserTest.testIsInActionScope_unselectedScopejalview.gui.AnnotationChooserTest.testIsInActionScope_unselectedScope	1PASS
0.007156799	jalview.gui.AlignFrameTest.testChangeColour_background_groupsAndThresholdsjalview.gui.AlignFrameTest.testChangeColour_background_groupsAndThresholds	1PASS
0.007156799	jalview.gui.AnnotationChooserTest.testDeselectType_showForAlljalview.gui.AnnotationChooserTest.testDeselectType_showForAll	1PASS
0.007156799	jalview.analysis.AnnotationSorterTest.testSort_timingSemisortedjalview.analysis.AnnotationSorterTest.testSort_timingSemisorted	1PASS
0.007156799	jalview.analysis.AnnotationSorterTest.testSort_timingPresortedjalview.analysis.AnnotationSorterTest.testSort_timingPresorted	1PASS
0.007156799	jalview.gui.AnnotationChooserTest.testDeselectType_hideForSelectedjalview.gui.AnnotationChooserTest.testDeselectType_hideForSelected	1PASS
0.0058555626	jalview.analysis.GroupingTest.testMakeGroupsWithBothjalview.analysis.GroupingTest.testMakeGroupsWithBoth	1PASS
0.0039037084	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_noSequenceSelectedjalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_noSequenceSelected	1PASS

Source view

* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)

* Copyright (C) $$Year-Rel$$ The Jalview Authors

* This file is part of Jalview.

* Jalview is free software: you can redistribute it and/or

* modify it under the terms of the GNU General Public License

* as published by the Free Software Foundation, either version 3

* of the License, or (at your option) any later version.

* Jalview is distributed in the hope that it will be useful, but

* WITHOUT ANY WARRANTY; without even the implied warranty

* of MERCHANTABILITY or FITNESS FOR A PARTICULAR

* PURPOSE. See the GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with Jalview. If not, see <http://www.gnu.org/licenses/>.

* The Jalview Authors are detailed in the 'AUTHORS' file.

package jalview.analysis;

import java.awt.Color;

import java.util.ArrayList;

import java.util.Arrays;

import java.util.Collection;

import java.util.Collections;

import java.util.HashMap;

import java.util.HashSet;

import java.util.Iterator;

import java.util.LinkedHashMap;

import java.util.List;

import java.util.Locale;

import java.util.Map;

import java.util.Map.Entry;

import java.util.NoSuchElementException;

import java.util.Set;

import java.util.SortedMap;

import java.util.TreeMap;

import java.util.Vector;

import java.util.stream.Collectors;

import org.jcolorbrewer.ColorBrewer;

import jalview.api.AlignCalcWorkerI;

import jalview.bin.Console;

import jalview.commands.RemoveGapColCommand;

import jalview.datamodel.AlignedCodon;

import jalview.datamodel.AlignedCodonFrame;

import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;

import jalview.datamodel.Alignment;

import jalview.datamodel.AlignmentAnnotation;

import jalview.datamodel.AlignmentI;

import jalview.datamodel.Annotation;

import jalview.datamodel.ContactMatrixI;

import jalview.datamodel.DBRefEntry;

import jalview.datamodel.GeneLociI;

import jalview.datamodel.IncompleteCodonException;

import jalview.datamodel.Mapping;

import jalview.datamodel.PDBEntry;

import jalview.datamodel.SeqCigar;

import jalview.datamodel.Sequence;

import jalview.datamodel.SequenceFeature;

import jalview.datamodel.SequenceGroup;

import jalview.datamodel.SequenceI;

import jalview.datamodel.features.SequenceFeatures;

import jalview.gui.AlignmentPanel;

import jalview.io.gff.SequenceOntologyI;

import jalview.schemes.ResidueProperties;

import jalview.util.Comparison;

import jalview.util.Constants;

import jalview.util.DBRefUtils;

import jalview.util.IntRangeComparator;

import jalview.util.MapList;

import jalview.util.MappingUtils;

import jalview.util.MessageManager;

import jalview.workers.SecondaryStructureConsensusThread;

/**

* grab bag of useful alignment manipulation operations Expect these to be

* refactored elsewhere at some point.

* @author jimp

public class AlignmentUtils

{

private static final int CODON_LENGTH = 3;

private static final String SEQUENCE_VARIANT = "sequence_variant:";

* the 'id' attribute is provided for variant features fetched from

* Ensembl using its REST service with JSON format

public static final String VARIANT_ID = "id";

/**

* A data model to hold the 'normal' base value at a position, and an optional

100

* sequence variant feature

101

102

static final class DnaVariant

{

final String base;

SequenceFeature variant;

107

108

DnaVariant(String nuc)

{

base = nuc;

variant = null;

}

DnaVariant(String nuc, SequenceFeature var)

{

base = nuc;

variant = var;

}

public String getSource()

121

{

122

return variant == null ? null : variant.getFeatureGroup();

}

/**

* toString for aid in the debugger only

127

128

@Override

129

public String toString()

130

{

131

return base + ":" + (variant == null ? "" : variant.getDescription());

}

}

/**

* given an existing alignment, create a new alignment including all, or up to

137

* flankSize additional symbols from each sequence's dataset sequence

* @param core

* @param flankSize

* @return AlignmentI

public static AlignmentI expandContext(AlignmentI core, int flankSize)

144

{

145

List<SequenceI> sq = new ArrayList<>();

146

int maxoffset = 0;

147

for (SequenceI s : core.getSequences())

148

{

149

131

SequenceI newSeq = s.deriveSequence();

150

131

final int newSeqStart = newSeq.getStart() - 1;

151

131

if (newSeqStart > maxoffset

152

&& newSeq.getDatasetSequence().getStart() < s.getStart())

153

{

154

131

maxoffset = newSeqStart;

155

}

156

131

sq.add(newSeq);

}

if (flankSize > -1)

{

maxoffset = Math.min(maxoffset, flankSize);

}

* now add offset left and right to create an expanded alignment

165

166

for (SequenceI s : sq)

167

{

168

131

SequenceI ds = s;

169

262

while (ds.getDatasetSequence() != null)

170

{

171

131

ds = ds.getDatasetSequence();

172

}

173

131

int s_end = s.findPosition(s.getStart() + s.getLength());

174

// find available flanking residues for sequence

175

131

int ustream_ds = s.getStart() - ds.getStart();

176

131

int dstream_ds = ds.getEnd() - s_end;

177

178

// build new flanked sequence

179

180

// compute gap padding to start of flanking sequence

181

131

int offset = maxoffset - ustream_ds;

182

183

// padding is gapChar x ( maxoffset - min(ustream_ds, flank)

184

131

if (flankSize >= 0)

185

{

186

125

if (flankSize < ustream_ds)

187

{

188

// take up to flankSize residues

189

offset = maxoffset - flankSize;

190

ustream_ds = flankSize;

191

}

192

125

if (flankSize <= dstream_ds)

193

{

194

116

dstream_ds = flankSize - 1;

195

}

196

}

197

// TODO use Character.toLowerCase to avoid creating String objects?

198

131

char[] upstream = new String(ds

199

.getSequence(s.getStart() - 1 - ustream_ds, s.getStart() - 1))

200

.toLowerCase(Locale.ROOT).toCharArray();

201

131

char[] downstream = new String(

202

ds.getSequence(s_end - 1, s_end + dstream_ds))

203

.toLowerCase(Locale.ROOT).toCharArray();

204

131

char[] coreseq = s.getSequence();

205

131

char[] nseq = new char[offset + upstream.length + downstream.length

206

+ coreseq.length];

207

131

char c = core.getGapCharacter();

208

209

131

int p = 0;

210

461

for (; p < offset; p++)

211

{

212

330

nseq[p] = c;

213

}

214

215

131

System.arraycopy(upstream, 0, nseq, p, upstream.length);

216

131

System.arraycopy(coreseq, 0, nseq, p + upstream.length,

217

coreseq.length);

218

131

System.arraycopy(downstream, 0, nseq,

219

p + coreseq.length + upstream.length, downstream.length);

220

131

s.setSequence(new String(nseq));

221

131

s.setStart(s.getStart() - ustream_ds);

222

131

s.setEnd(s_end + downstream.length);

223

}

224

AlignmentI newAl = new jalview.datamodel.Alignment(

225

sq.toArray(new SequenceI[0]));

226

for (SequenceI s : sq)

227

{

228

131

if (s.getAnnotation() != null)

229

{

230

for (AlignmentAnnotation aa : s.getAnnotation())

231

{

232

aa.adjustForAlignment(); // JAL-1712 fix

233

newAl.addAnnotation(aa);

}

}

}

newAl.setDataset(core.getDataset());

return newAl;

}

/**

* Returns the index (zero-based position) of a sequence in an alignment, or

* -1 if not found.

* @param al

* @param seq

* @return

56066

public static int getSequenceIndex(AlignmentI al, SequenceI seq)

250

{

251

56066

int result = -1;

252

56066

int pos = 0;

253

56066

for (SequenceI alSeq : al.getSequences())

254

{

255

126110737

if (alSeq == seq)

256

{

257

56066

result = pos;

258

56066

break;

259

}

260

126054671

pos++;

261

}

262

56066

return result;

}

/**

* Returns a map of lists of sequences in the alignment, keyed by sequence

267

* name. For use in mapping between different alignment views of the same

268

* sequences.

269

270

* @see jalview.datamodel.AlignmentI#getSequencesByName()

271

272

public static Map<String, List<SequenceI>> getSequencesByName(

273

AlignmentI al)

274

{

275

Map<String, List<SequenceI>> theMap = new LinkedHashMap<>();

276

for (SequenceI seq : al.getSequences())

277

{

278

String name = seq.getName();

279

if (name != null)

280

{

281

List<SequenceI> seqs = theMap.get(name);

282

if (seqs == null)

283

{

284

seqs = new ArrayList<>();

285

theMap.put(name, seqs);

}

seqs.add(seq);

}

}

return theMap;

}

/**

* Build mapping of protein to cDNA alignment. Mappings are made between

295

* sequences where the cDNA translates to the protein sequence. Any new

296

* mappings are added to the protein alignment. Returns true if any mappings

297

* either already exist or were added, else false.

298

299

* @param proteinAlignment

300

* @param cdnaAlignment

301

* @return

302

303

public static boolean mapProteinAlignmentToCdna(

304

final AlignmentI proteinAlignment, final AlignmentI cdnaAlignment)

305

{

306

if (proteinAlignment == null || cdnaAlignment == null)

{

return false;

}

Set<SequenceI> mappedDna = new HashSet<>();

312

Set<SequenceI> mappedProtein = new HashSet<>();

313

314

315

* First pass - map sequences where cross-references exist. This include

316

* 1-to-many mappings to support, for example, variant cDNA.

317

318

boolean mappingPerformed = mapProteinToCdna(proteinAlignment,

319

cdnaAlignment, mappedDna, mappedProtein, true);

320

321

322

* Second pass - map sequences where no cross-references exist. This only

323

* does 1-to-1 mappings and assumes corresponding sequences are in the same

324

* order in the alignments.

325

326

mappingPerformed |= mapProteinToCdna(proteinAlignment, cdnaAlignment,

327

mappedDna, mappedProtein, false);

328

return mappingPerformed;

}

/**

* Make mappings between compatible sequences (where the cDNA translation

333

* matches the protein).

334

335

* @param proteinAlignment

336

* @param cdnaAlignment

337

* @param mappedDna

338

* a set of mapped DNA sequences (to add to)

339

* @param mappedProtein

340

* a set of mapped Protein sequences (to add to)

341

* @param xrefsOnly

342

* if true, only map sequences where xrefs exist

343

* @return

344

345

protected static boolean mapProteinToCdna(

346

final AlignmentI proteinAlignment, final AlignmentI cdnaAlignment,

347

Set<SequenceI> mappedDna, Set<SequenceI> mappedProtein,

348

boolean xrefsOnly)

349

{

350

boolean mappingExistsOrAdded = false;

351

List<SequenceI> thisSeqs = proteinAlignment.getSequences();

352

for (SequenceI aaSeq : thisSeqs)

353

{

354

boolean proteinMapped = false;

355

AlignedCodonFrame acf = new AlignedCodonFrame();

356

357

for (SequenceI cdnaSeq : cdnaAlignment.getSequences())

358

{

359

360

* Always try to map if sequences have xref to each other; this supports

361

* variant cDNA or alternative splicing for a protein sequence.

362

363

* If no xrefs, try to map progressively, assuming that alignments have

364

* mappable sequences in corresponding order. These are not

365

* many-to-many, as that would risk mixing species with similar cDNA

366

* sequences.

367

368

if (xrefsOnly && !AlignmentUtils.haveCrossRef(aaSeq, cdnaSeq))

{

continue;

}

* Don't map non-xrefd sequences more than once each. This heuristic

375

* allows us to pair up similar sequences in ordered alignments.

376

377

if (!xrefsOnly && (mappedProtein.contains(aaSeq)

378

|| mappedDna.contains(cdnaSeq)))

{

continue;

}

if (mappingExists(proteinAlignment.getCodonFrames(),

383

aaSeq.getDatasetSequence(), cdnaSeq.getDatasetSequence()))

384

{

385

mappingExistsOrAdded = true;

}

else

{

MapList map = mapCdnaToProtein(aaSeq, cdnaSeq);

390

if (map != null)

391

{

392

acf.addMap(cdnaSeq, aaSeq, map);

393

mappingExistsOrAdded = true;

394

proteinMapped = true;

395

mappedDna.add(cdnaSeq);

396

mappedProtein.add(aaSeq);

}

}

}

if (proteinMapped)

{

proteinAlignment.addCodonFrame(acf);

403

}

404

}

405

return mappingExistsOrAdded;

}

/**

* Answers true if the mappings include one between the given (dataset)

410

* sequences.

411

412

protected static boolean mappingExists(List<AlignedCodonFrame> mappings,

413

SequenceI aaSeq, SequenceI cdnaSeq)

414

{

415

if (mappings != null)

416

{

417

for (AlignedCodonFrame acf : mappings)

418

{

419

if (cdnaSeq == acf.getDnaForAaSeq(aaSeq))

{

return true;

}

}

}

return false;

}

/**

* Builds a mapping (if possible) of a cDNA to a protein sequence.

430

* <ul>

431

* <li>first checks if the cdna translates exactly to the protein

432

* sequence</li>

433

* <li>else checks for translation after removing a STOP codon</li>

434

* <li>else checks for translation after removing a START codon</li>

435

* <li>if that fails, inspect CDS features on the cDNA sequence</li>

436

* </ul>

437

* Returns null if no mapping is determined.

438

439

* @param proteinSeq

440

* the aligned protein sequence

441

* @param cdnaSeq

442

* the aligned cdna sequence

443

* @return

444

445

public static MapList mapCdnaToProtein(SequenceI proteinSeq,

SequenceI cdnaSeq)

{

* Here we handle either dataset sequence set (desktop) or absent (applet).

450

* Use only the char[] form of the sequence to avoid creating possibly large

451

* String objects.

452

453

final SequenceI proteinDataset = proteinSeq.getDatasetSequence();

454

char[] aaSeqChars = proteinDataset != null

455

? proteinDataset.getSequence()

456

: proteinSeq.getSequence();

457

final SequenceI cdnaDataset = cdnaSeq.getDatasetSequence();

458

char[] cdnaSeqChars = cdnaDataset != null ? cdnaDataset.getSequence()

459

: cdnaSeq.getSequence();

460

if (aaSeqChars == null || cdnaSeqChars == null)

{

return null;

}

* cdnaStart/End, proteinStartEnd are base 1 (for dataset sequence mapping)

467

468

final int mappedLength = CODON_LENGTH * aaSeqChars.length;

469

int cdnaLength = cdnaSeqChars.length;

470

int cdnaStart = cdnaSeq.getStart();

471

int cdnaEnd = cdnaSeq.getEnd();

472

final int proteinStart = proteinSeq.getStart();

473

final int proteinEnd = proteinSeq.getEnd();

474

475

476

* If lengths don't match, try ignoring stop codon (if present)

477

478

if (cdnaLength != mappedLength && cdnaLength > 2)

479

{

480

String lastCodon = String.valueOf(cdnaSeqChars,

481

cdnaLength - CODON_LENGTH, CODON_LENGTH)

482

.toUpperCase(Locale.ROOT);

483

for (String stop : ResidueProperties.STOP_CODONS)

484

{

485

if (lastCodon.equals(stop))

486

{

487

cdnaEnd -= CODON_LENGTH;

488

cdnaLength -= CODON_LENGTH;

break;

}

}

}

* If lengths still don't match, try ignoring start codon.

496

497

int startOffset = 0;

498

if (cdnaLength != mappedLength && cdnaLength > 2

499

&& String.valueOf(cdnaSeqChars, 0, CODON_LENGTH)

500

.toUpperCase(Locale.ROOT)

501

.equals(ResidueProperties.START))

502

{

503

startOffset += CODON_LENGTH;

504

cdnaStart += CODON_LENGTH;

505

cdnaLength -= CODON_LENGTH;

506

}

507

508

if (translatesAs(cdnaSeqChars, startOffset, aaSeqChars))

509

{

510

511

* protein is translation of dna (+/- start/stop codons)

512

513

MapList map = new MapList(new int[] { cdnaStart, cdnaEnd },

514

new int[]

515

{ proteinStart, proteinEnd }, CODON_LENGTH, 1);

return map;

}

* translation failed - try mapping CDS annotated regions of dna

521

522

return mapCdsToProtein(cdnaSeq, proteinSeq);

}

/**

* Test whether the given cdna sequence, starting at the given offset,

527

* translates to the given amino acid sequence, using the standard translation

528

* table. Designed to fail fast i.e. as soon as a mismatch position is found.

529

530

* @param cdnaSeqChars

* @param cdnaStart

* @param aaSeqChars

* @return

protected static boolean translatesAs(char[] cdnaSeqChars, int cdnaStart,

536

char[] aaSeqChars)

537

{

538

if (cdnaSeqChars == null || aaSeqChars == null)

{

return false;

}

int aaPos = 0;

int dnaPos = cdnaStart;

545

160

for (; dnaPos < cdnaSeqChars.length - 2

546

&& aaPos < aaSeqChars.length; dnaPos += CODON_LENGTH, aaPos++)

547

{

548

129

String codon = String.valueOf(cdnaSeqChars, dnaPos, CODON_LENGTH);

549

129

final String translated = ResidueProperties.codonTranslate(codon);

550

551

552

* allow * in protein to match untranslatable in dna

553

554

129

final char aaRes = aaSeqChars[aaPos];

555

129

if ((translated == null || ResidueProperties.STOP.equals(translated))

&& aaRes == '*')

{

continue;

}

125

if (translated == null || !(aaRes == translated.charAt(0)))

561

{

562

// debug

563

// jalview.bin.Console.outPrintln(("Mismatch at " + i + "/" + aaResidue

564

// + ": "

565

// + codon + "(" + translated + ") != " + aaRes));

return false;

}

}

* check we matched all of the protein sequence

572

573

if (aaPos != aaSeqChars.length)

{

return false;

}

* check we matched all of the dna except

580

* for optional trailing STOP codon

581

582

if (dnaPos == cdnaSeqChars.length)

{

return true;

}

if (dnaPos == cdnaSeqChars.length - CODON_LENGTH)

587

{

588

String codon = String.valueOf(cdnaSeqChars, dnaPos, CODON_LENGTH);

589

if (ResidueProperties.STOP

590

.equals(ResidueProperties.codonTranslate(codon)))

{

return true;

}

}

return false;

}

/**

* Align sequence 'seq' to match the alignment of a mapped sequence. Note this

600

* currently assumes that we are aligning cDNA to match protein.

601

602

* @param seq

603

* the sequence to be realigned

604

* @param al

605

* the alignment whose sequence alignment is to be 'copied'

606

* @param gap

607

* character string represent a gap in the realigned sequence

608

* @param preserveUnmappedGaps

609

* @param preserveMappedGaps

610

* @return true if the sequence was realigned, false if it could not be

611

612

public static boolean alignSequenceAs(SequenceI seq, AlignmentI al,

613

String gap, boolean preserveMappedGaps,

614

boolean preserveUnmappedGaps)

615

{

616

617

* Get any mappings from the source alignment to the target (dataset)

618

* sequence.

619

620

// TODO there may be one AlignedCodonFrame per dataset sequence, or one with

621

// all mappings. Would it help to constrain this?

622

List<AlignedCodonFrame> mappings = al.getCodonFrame(seq);

623

if (mappings == null || mappings.isEmpty())

{

return false;

}

* Locate the aligned source sequence whose dataset sequence is mapped. We

630

* just take the first match here (as we can't align like more than one

631

* sequence).

632

633

SequenceI alignFrom = null;

634

AlignedCodonFrame mapping = null;

635

for (AlignedCodonFrame mp : mappings)

636

{

637

alignFrom = mp.findAlignedSequence(seq, al);

638

if (alignFrom != null)

{

mapping = mp;

break;

}

}

if (alignFrom == null)

{

return false;

}

alignSequenceAs(seq, alignFrom, mapping, gap, al.getGapCharacter(),

650

preserveMappedGaps, preserveUnmappedGaps);

return true;

}

/**

* Align sequence 'alignTo' the same way as 'alignFrom', using the mapping to

656

* match residues and codons. Flags control whether existing gaps in unmapped

657

* (intron) and mapped (exon) regions are preserved or not. Gaps between

658

* intron and exon are only retained if both flags are set.

* @param alignTo

* @param alignFrom

* @param mapping

* @param myGap

* @param sourceGap

* @param preserveUnmappedGaps

666

* @param preserveMappedGaps

667

668

public static void alignSequenceAs(SequenceI alignTo, SequenceI alignFrom,

669

AlignedCodonFrame mapping, String myGap, char sourceGap,

670

boolean preserveMappedGaps, boolean preserveUnmappedGaps)

671

{

672

// TODO generalise to work for Protein-Protein, dna-dna, dna-protein

673

674

// aligned and dataset sequence positions, all base zero

int thisSeqPos = 0;

int sourceDsPos = 0;

int basesWritten = 0;

679

char myGapChar = myGap.charAt(0);

680

int ratio = myGap.length();

681

682

int fromOffset = alignFrom.getStart() - 1;

683

int toOffset = alignTo.getStart() - 1;

684

int sourceGapMappedLength = 0;

685

boolean inExon = false;

686

final int toLength = alignTo.getLength();

687

final int fromLength = alignFrom.getLength();

688

StringBuilder thisAligned = new StringBuilder(2 * toLength);

689

690

691

* Traverse the 'model' aligned sequence

692

693

109

for (int i = 0; i < fromLength; i++)

694

{

695

char sourceChar = alignFrom.getCharAt(i);

696

if (sourceChar == sourceGap)

697

{

698

sourceGapMappedLength += ratio;

continue;

}

* Found a non-gap character. Locate its mapped region if any.

704

705

sourceDsPos++;

706

// Note mapping positions are base 1, our sequence positions base 0

707

int[] mappedPos = mapping.getMappedRegion(alignTo, alignFrom,

708

sourceDsPos + fromOffset);

709

if (mappedPos == null)

710

{

711

712

* unmapped position; treat like a gap

713

714

sourceGapMappedLength += ratio;

715

// jalview.bin.Console.errPrintln("Can't align: no codon mapping to

716

// residue "

717

// + sourceDsPos + "(" + sourceChar + ")");

// return;

continue;

}

int mappedCodonStart = mappedPos[0]; // position (1...) of codon start

723

int mappedCodonEnd = mappedPos[mappedPos.length - 1]; // codon end pos

724

StringBuilder trailingCopiedGap = new StringBuilder();

725

726

727

* Copy dna sequence up to and including this codon. Optionally, include

728

* gaps before the codon starts (in introns) and/or after the codon starts

729

* (in exons).

730

731

* Note this only works for 'linear' splicing, not reverse or interleaved.

732

* But then 'align dna as protein' doesn't make much sense otherwise.

733

734

int intronLength = 0;

735

278

while (basesWritten + toOffset < mappedCodonEnd

736

&& thisSeqPos < toLength)

737

{

738

242

final char c = alignTo.getCharAt(thisSeqPos++);

739

242

if (c != myGapChar)

740

{

741

142

basesWritten++;

742

142

int sourcePosition = basesWritten + toOffset;

743

142

if (sourcePosition < mappedCodonStart)

744

{

745

746

* Found an unmapped (intron) base. First add in any preceding gaps

747

* (if wanted).

748

749

if (preserveUnmappedGaps && trailingCopiedGap.length() > 0)

750

{

751

thisAligned.append(trailingCopiedGap.toString());

752

intronLength += trailingCopiedGap.length();

753

trailingCopiedGap = new StringBuilder();

}

intronLength++;

inExon = false;

}

else

{

final boolean startOfCodon = sourcePosition == mappedCodonStart;

761

int gapsToAdd = calculateGapsToInsert(preserveMappedGaps,

762

preserveUnmappedGaps, sourceGapMappedLength, inExon,

763

trailingCopiedGap.length(), intronLength, startOfCodon);

764

179

for (int k = 0; k < gapsToAdd; k++)

765

{

766

thisAligned.append(myGapChar);

767

}

768

sourceGapMappedLength = 0;

769

inExon = true;

770

}

771

142

thisAligned.append(c);

772

142

trailingCopiedGap = new StringBuilder();

}

else

{

100

if (inExon && preserveMappedGaps)

777

{

778

trailingCopiedGap.append(myGapChar);

779

}

780

else if (!inExon && preserveUnmappedGaps)

781

{

782

trailingCopiedGap.append(myGapChar);

}

}

}

}

* At end of model aligned sequence. Copy any remaining target sequence, optionally

790

* including (intron) gaps.

791

792

while (thisSeqPos < toLength)

793

{

794

final char c = alignTo.getCharAt(thisSeqPos++);

795

if (c != myGapChar || preserveUnmappedGaps)

796

{

797

thisAligned.append(c);

798

}

799

sourceGapMappedLength--;

}

* finally add gaps to pad for any trailing source gaps or

804

* unmapped characters

805

806

if (preserveUnmappedGaps)

807

{

808

while (sourceGapMappedLength > 0)

809

{

810

thisAligned.append(myGapChar);

811

sourceGapMappedLength--;

}

}

* All done aligning, set the aligned sequence.

817

818

alignTo.setSequence(new String(thisAligned));

}

/**

* Helper method to work out how many gaps to insert when realigning.

823

824

* @param preserveMappedGaps

825

* @param preserveUnmappedGaps

826

* @param sourceGapMappedLength

827

* @param inExon

828

* @param trailingCopiedGap

829

* @param intronLength

830

* @param startOfCodon

831

* @return

832

833

protected static int calculateGapsToInsert(boolean preserveMappedGaps,

834

boolean preserveUnmappedGaps, int sourceGapMappedLength,

835

boolean inExon, int trailingGapLength, int intronLength,

836

final boolean startOfCodon)

{

int gapsToAdd = 0;

if (startOfCodon)

{

* Reached start of codon. Ignore trailing gaps in intron unless we are

843

* preserving gaps in both exon and intron. Ignore them anyway if the

844

* protein alignment introduces a gap at least as large as the intronic

845

* region.

846

847

if (inExon && !preserveMappedGaps)

848

{

849

trailingGapLength = 0;

850

}

851

if (!inExon && !(preserveMappedGaps && preserveUnmappedGaps))

852

{

853

trailingGapLength = 0;

}

if (inExon)

{

gapsToAdd = Math.max(sourceGapMappedLength, trailingGapLength);

}

else

{

if (intronLength + trailingGapLength <= sourceGapMappedLength)

862

{

863

gapsToAdd = sourceGapMappedLength - intronLength;

}

else

{

gapsToAdd = Math.min(

868

intronLength + trailingGapLength - sourceGapMappedLength,

trailingGapLength);

}

}

}

else

{

* second or third base of codon; check for any gaps in dna

877

878

if (!preserveMappedGaps)

879

{

880

trailingGapLength = 0;

881

}

882

gapsToAdd = Math.max(sourceGapMappedLength, trailingGapLength);

}

return gapsToAdd;

}

/**

* Realigns the given protein to match the alignment of the dna, using codon

889

* mappings to translate aligned codon positions to protein residues.

890

891

* @param protein

892

* the alignment whose sequences are realigned by this method

893

* @param dna

894

* the dna alignment whose alignment we are 'copying'

895

* @return the number of sequences that were realigned

896

897

public static int alignProteinAsDna(AlignmentI protein, AlignmentI dna)

898

{

899

if (protein.isNucleotide() || !dna.isNucleotide())

900

{

901

jalview.bin.Console

902

.errPrintln("Wrong alignment type in alignProteinAsDna");

903

return 0;

904

}

905

List<SequenceI> unmappedProtein = new ArrayList<>();

906

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons = buildCodonColumnsMap(

907

protein, dna, unmappedProtein);

908

return alignProteinAs(protein, alignedCodons, unmappedProtein);

}

/**

* Realigns the given dna to match the alignment of the protein, using codon

913

* mappings to translate aligned peptide positions to codons.

914

915

* Always produces a padded CDS alignment.

916

917

* @param dna

918

* the alignment whose sequences are realigned by this method

919

* @param protein

920

* the protein alignment whose alignment we are 'copying'

921

* @return the number of sequences that were realigned

922

923

public static int alignCdsAsProtein(AlignmentI dna, AlignmentI protein)

924

{

925

if (protein.isNucleotide() || !dna.isNucleotide())

926

{

927

jalview.bin.Console

928

.errPrintln("Wrong alignment type in alignProteinAsDna");

929

return 0;

930

}

931

// todo: implement this

932

List<AlignedCodonFrame> mappings = protein.getCodonFrames();

933

int alignedCount = 0;

934

int width = 0; // alignment width for padding CDS

935

for (SequenceI dnaSeq : dna.getSequences())

936

{

937

if (alignCdsSequenceAsProtein(dnaSeq, protein, mappings,

938

dna.getGapCharacter()))

{

alignedCount++;

}

width = Math.max(dnaSeq.getLength(), width);

}

int oldwidth;

int diff;

for (SequenceI dnaSeq : dna.getSequences())

947

{

948

oldwidth = dnaSeq.getLength();

949

diff = width - oldwidth;

950

if (diff > 0)

951

{

952

dnaSeq.insertCharAt(oldwidth, diff, dna.getGapCharacter());

}

}

return alignedCount;

}

/**

* Helper method to align (if possible) the dna sequence to match the

960

* alignment of a mapped protein sequence. This is currently limited to

961

* handling coding sequence only.

* @param cdsSeq

* @param protein

* @param mappings

* @param gapChar

* @return

static boolean alignCdsSequenceAsProtein(SequenceI cdsSeq,

970

AlignmentI protein, List<AlignedCodonFrame> mappings,

971

char gapChar)

972

{

973

SequenceI cdsDss = cdsSeq.getDatasetSequence();

if (cdsDss == null)

{

System.err

.println("alignCdsSequenceAsProtein needs aligned sequence!");

return false;

}

List<AlignedCodonFrame> dnaMappings = MappingUtils

982

.findMappingsForSequence(cdsSeq, mappings);

983

for (AlignedCodonFrame mapping : dnaMappings)

984

{

985

SequenceI peptide = mapping.findAlignedSequence(cdsSeq, protein);

986

if (peptide != null)

987

{

988

final int peptideLength = peptide.getLength();

989

Mapping map = mapping.getMappingBetween(cdsSeq, peptide);

990

if (map != null)

991

{

992

MapList mapList = map.getMap();

993

if (map.getTo() == peptide.getDatasetSequence())

994

{

995

mapList = mapList.getInverse();

996

}

997

final int cdsLength = cdsDss.getLength();

998

int mappedFromLength = MappingUtils

999

.getLength(mapList.getFromRanges());

1000

int mappedToLength = MappingUtils

1001

.getLength(mapList.getToRanges());

1002

boolean addStopCodon = (cdsLength == mappedFromLength

1003

* CODON_LENGTH + CODON_LENGTH)

1004

|| (peptide.getDatasetSequence()

1005

.getLength() == mappedFromLength - 1);

1006

if (cdsLength != mappedToLength && !addStopCodon)

1007

{

1008

jalview.bin.Console.errPrintln(String.format(

1009

"Can't align cds as protein (length mismatch %d/%d): %s",

1010

cdsLength, mappedToLength, cdsSeq.getName()));

}

* pre-fill the aligned cds sequence with gaps

1015

1016

char[] alignedCds = new char[peptideLength * CODON_LENGTH

1017

+ (addStopCodon ? CODON_LENGTH : 0)];

1018

Arrays.fill(alignedCds, gapChar);

1019

1020

1021

* walk over the aligned peptide sequence and insert mapped

1022

* codons for residues in the aligned cds sequence

1023

1024

int copiedBases = 0;

1025

int cdsStart = cdsDss.getStart();

1026

int proteinPos = peptide.getStart() - 1;

1027

int cdsCol = 0;

1028

1029

for (int col = 0; col < peptideLength; col++)

1030

{

1031

char residue = peptide.getCharAt(col);

1032

1033

if (Comparison.isGap(residue))

1034

{

1035

cdsCol += CODON_LENGTH;

}

else

{

proteinPos++;

int[] codon = mapList.locateInTo(proteinPos, proteinPos);

1041

if (codon == null)

1042

{

1043

// e.g. incomplete start codon, X in peptide

1044

cdsCol += CODON_LENGTH;

}

else

{

for (int j = codon[0]; j <= codon[1]; j++)

1049

{

1050

char mappedBase = cdsDss.getCharAt(j - cdsStart);

1051

alignedCds[cdsCol++] = mappedBase;

copiedBases++;

}

}

}

}

* append stop codon if not mapped from protein,

1060

* closing it up to the end of the mapped sequence

1061

1062

if (copiedBases == cdsLength - CODON_LENGTH)

1063

{

1064

for (int i = alignedCds.length - 1; i >= 0; i--)

1065

{

1066

if (!Comparison.isGap(alignedCds[i]))

1067

{

1068

cdsCol = i + 1; // gap just after end of sequence

break;

}

}

for (int i = cdsLength - CODON_LENGTH; i < cdsLength; i++)

1073

{

1074

alignedCds[cdsCol++] = cdsDss.getCharAt(i);

1075

}

1076

}

1077

cdsSeq.setSequence(new String(alignedCds));

return true;

}

}

}

return false;

}

/**

* Builds a map whose key is an aligned codon position (3 alignment column

1087

* numbers base 0), and whose value is a map from protein sequence to each

1088

* protein's peptide residue for that codon. The map generates an ordering of

1089

* the codons, and allows us to read off the peptides at each position in

1090

* order to assemble 'aligned' protein sequences.

1091

1092

* @param protein

1093

* the protein alignment

1094

* @param dna

1095

* the coding dna alignment

1096

* @param unmappedProtein

1097

* any unmapped proteins are added to this list

1098

* @return

1099

1100

protected static Map<AlignedCodon, Map<SequenceI, AlignedCodon>> buildCodonColumnsMap(

1101

AlignmentI protein, AlignmentI dna,

1102

List<SequenceI> unmappedProtein)

1103

{

1104

1105

* maintain a list of any proteins with no mappings - these will be

1106

* rendered 'as is' in the protein alignment as we can't align them

1107

1108

unmappedProtein.addAll(protein.getSequences());

1109

1110

List<AlignedCodonFrame> mappings = protein.getCodonFrames();

1111

1112

1113

* Map will hold, for each aligned codon position e.g. [3, 5, 6], a map of

1114

* {dnaSequence, {proteinSequence, codonProduct}} at that position. The

1115

* comparator keeps the codon positions ordered.

1116

1117

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons = new TreeMap<>(

1118

new CodonComparator());

1119

1120

for (SequenceI dnaSeq : dna.getSequences())

1121

{

1122

for (AlignedCodonFrame mapping : mappings)

1123

{

1124

SequenceI prot = mapping.findAlignedSequence(dnaSeq, protein);

1125

if (prot != null)

1126

{

1127

Mapping seqMap = mapping.getMappingForSequence(dnaSeq);

1128

addCodonPositions(dnaSeq, prot, protein.getGapCharacter(), seqMap,

1129

alignedCodons);

1130

unmappedProtein.remove(prot);

}

}

}

* Finally add any unmapped peptide start residues (e.g. for incomplete

1137

* codons) as if at the codon position before the second residue

1138

1139

// TODO resolve JAL-2022 so this fudge can be removed

1140

int mappedSequenceCount = protein.getHeight() - unmappedProtein.size();

1141

addUnmappedPeptideStarts(alignedCodons, mappedSequenceCount);

1142

1143

return alignedCodons;

}

/**

* Scans for any protein mapped from position 2 (meaning unmapped start

1148

* position e.g. an incomplete codon), and synthesizes a 'codon' for it at the

1149

* preceding position in the alignment

1150

1151

* @param alignedCodons

1152

* the codon-to-peptide map

1153

* @param mappedSequenceCount

1154

* the number of distinct sequences in the map

1155

1156

protected static void addUnmappedPeptideStarts(

1157

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons,

1158

int mappedSequenceCount)

1159

{

1160

// TODO delete this ugly hack once JAL-2022 is resolved

1161

// i.e. we can model startPhase > 0 (incomplete start codon)

1162

1163

List<SequenceI> sequencesChecked = new ArrayList<>();

1164

AlignedCodon lastCodon = null;

1165

Map<SequenceI, AlignedCodon> toAdd = new HashMap<>();

1166

1167

for (Entry<AlignedCodon, Map<SequenceI, AlignedCodon>> entry : alignedCodons

1168

.entrySet())

1169

{

1170

for (Entry<SequenceI, AlignedCodon> sequenceCodon : entry.getValue()

1171

.entrySet())

1172

{

1173

SequenceI seq = sequenceCodon.getKey();

1174

if (sequencesChecked.contains(seq))

{

continue;

}

sequencesChecked.add(seq);

1179

AlignedCodon codon = sequenceCodon.getValue();

1180

if (codon.peptideCol > 1)

1181

{

1182

jalview.bin.Console.errPrintln(

1183

"Problem mapping protein with >1 unmapped start positions: "

1184

+ seq.getName());

1185

}

1186

else if (codon.peptideCol == 1)

1187

{

1188

1189

* first position (peptideCol == 0) was unmapped - add it

1190

1191

if (lastCodon != null)

1192

{

1193

AlignedCodon firstPeptide = new AlignedCodon(lastCodon.pos1,

1194

lastCodon.pos2, lastCodon.pos3,

1195

String.valueOf(seq.getCharAt(0)), 0);

1196

toAdd.put(seq, firstPeptide);

}

else

{

* unmapped residue at start of alignment (no prior column) -

1202

* 'insert' at nominal codon [0, 0, 0]

1203

1204

AlignedCodon firstPeptide = new AlignedCodon(0, 0, 0,

1205

String.valueOf(seq.getCharAt(0)), 0);

1206

toAdd.put(seq, firstPeptide);

1207

}

1208

}

1209

if (sequencesChecked.size() == mappedSequenceCount)

1210

{

1211

// no need to check past first mapped position in all sequences

break;

}

}

lastCodon = entry.getKey();

}

* add any new codons safely after iterating over the map

1220

1221

for (Entry<SequenceI, AlignedCodon> startCodon : toAdd.entrySet())

1222

{

1223

addCodonToMap(alignedCodons, startCodon.getValue(),

1224

startCodon.getKey());

}

}

/**

* Update the aligned protein sequences to match the codon alignments given in

* the map.

* @param protein

* @param alignedCodons

1234

* an ordered map of codon positions (columns), with sequence/peptide

1235

* values present in each column

1236

* @param unmappedProtein

1237

* @return

1238

1239

protected static int alignProteinAs(AlignmentI protein,

1240

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons,

1241

List<SequenceI> unmappedProtein)

1242

{

1243

1244

* prefill peptide sequences with gaps

1245

1246

int alignedWidth = alignedCodons.size();

1247

char[] gaps = new char[alignedWidth];

1248

Arrays.fill(gaps, protein.getGapCharacter());

1249

Map<SequenceI, char[]> peptides = new HashMap<>();

1250

for (SequenceI seq : protein.getSequences())

1251

{

1252

if (!unmappedProtein.contains(seq))

1253

{

1254

peptides.put(seq, Arrays.copyOf(gaps, gaps.length));

}

}

* Traverse the codons left to right (as defined by CodonComparator)

1260

* and insert peptides in each column where the sequence is mapped.

1261

* This gives a peptide 'alignment' where residues are aligned if their

1262

* corresponding codons occupy the same columns in the cdna alignment.

1263

1264

int column = 0;

1265

for (AlignedCodon codon : alignedCodons.keySet())

1266

{

1267

final Map<SequenceI, AlignedCodon> columnResidues = alignedCodons

1268

.get(codon);

1269

for (Entry<SequenceI, AlignedCodon> entry : columnResidues.entrySet())

1270

{

1271

char residue = entry.getValue().product.charAt(0);

1272

peptides.get(entry.getKey())[column] = residue;

}

column++;

}

* and finally set the constructed sequences

1279

1280

for (Entry<SequenceI, char[]> entry : peptides.entrySet())

1281

{

1282

entry.getKey().setSequence(new String(entry.getValue()));

}

return 0;

}

/**

* Populate the map of aligned codons by traversing the given sequence

1290

* mapping, locating the aligned positions of mapped codons, and adding those

1291

* positions and their translation products to the map.

1292

1293

* @param dna

1294

* the aligned sequence we are mapping from

1295

* @param protein

1296

* the sequence to be aligned to the codons

1297

* @param gapChar

1298

* the gap character in the dna sequence

1299

* @param seqMap

1300

* a mapping to a sequence translation

1301

* @param alignedCodons

1302

* the map we are building up

1303

1304

static void addCodonPositions(SequenceI dna, SequenceI protein,

1305

char gapChar, Mapping seqMap,

1306

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons)

1307

{

1308

Iterator<AlignedCodon> codons = seqMap.getCodonIterator(dna, gapChar);

1309

1310

1311

* add codon positions, and their peptide translations, to the alignment

1312

* map, while remembering the first codon mapped

1313

1314

while (codons.hasNext())

{

try

{

AlignedCodon codon = codons.next();

1319

addCodonToMap(alignedCodons, codon, protein);

1320

} catch (IncompleteCodonException e)

1321

{

1322

// possible incomplete trailing codon - ignore

1323

} catch (NoSuchElementException e)

1324

{

1325

// possibly peptide lacking STOP

}

}

}

/**

* Helper method to add a codon-to-peptide entry to the aligned codons map

1332

1333

* @param alignedCodons

* @param codon

* @param protein

protected static void addCodonToMap(

1338

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons,

1339

AlignedCodon codon, SequenceI protein)

1340

{

1341

Map<SequenceI, AlignedCodon> seqProduct = alignedCodons.get(codon);

1342

if (seqProduct == null)

1343

{

1344

seqProduct = new HashMap<>();

1345

alignedCodons.put(codon, seqProduct);

1346

}

1347

seqProduct.put(protein, codon);

}

/**

* Returns true if a cDNA/Protein mapping either exists, or could be made,

1352

* between at least one pair of sequences in the two alignments. Currently,

1353

* the logic is:

1354

* <ul>

1355

* <li>One alignment must be nucleotide, and the other protein</li>

1356

* <li>At least one pair of sequences must be already mapped, or mappable</li>

1357

* <li>Mappable means the nucleotide translation matches the protein

1358

* sequence</li>

1359

* <li>The translation may ignore start and stop codons if present in the

* nucleotide</li>

* </ul>

* @param al1

* @param al2

* @return

public static boolean isMappable(AlignmentI al1, AlignmentI al2)

1368

{

1369

if (al1 == null || al2 == null || al1 == al2)

{

return false;

}

* Require one nucleotide and one protein

1376

1377

if (al1.isNucleotide() == al2.isNucleotide())

1378

{

1379

// or if protein - check if alternate coding

1380

if (al1.isNucleotide())

{

return false;

}

return check3diPeptideMapping(al1,al2);

1385

}

1386

AlignmentI dna = al1.isNucleotide() ? al1 : al2;

1387

AlignmentI protein = dna == al1 ? al2 : al1;

1388

List<AlignedCodonFrame> mappings = protein.getCodonFrames();

1389

for (SequenceI dnaSeq : dna.getSequences())

1390

{

1391

for (SequenceI proteinSeq : protein.getSequences())

1392

{

1393

if (isMappable(dnaSeq, proteinSeq, mappings))

{

return true;

}

}

}

return false;

}

public static boolean check3diPeptideMapping(AlignmentI al1, AlignmentI al2)

1402

{

1403

if (al1.getHeight()!=al2.getHeight())

{ return false;

}

int mappable=0;

for (SequenceI al1seq: al1.getSequences())

1408

{

1409

boolean foundMappable = false;

1410

for (SequenceI al2seq:al2.getSequences())

1411

{

1412

if (canBuild3diMapping(al1seq,al2seq))

1413

{

1414

foundMappable = true;

break;

}

}

if (foundMappable)

{

mappable++;

}

}

if (mappable>0 && mappable >= (al1.getHeight()*9/10))

1424

{

1425

// TODO allow optional if mappable > a few

return true;

}

return false;

}

/**

* exact name, start-end, and identical length non-gap sequences

* @param al1seq

* @param al2seq

* @return

public static boolean canBuild3diMapping(SequenceI al1seq,

1438

SequenceI al2seq)

1439

{

1440

if (!al1seq.getDisplayId(true)

1441

.equalsIgnoreCase(al2seq.getDisplayId(true))) {

return false;

}

String s1 = AlignSeq

.extractGaps(jalview.util.Comparison.GapChars,

1446

al1seq.getSequenceAsString());

1447

String s2 = AlignSeq.extractGaps(

1448

jalview.util.Comparison.GapChars,

1449

al2seq.getSequenceAsString());

1450

return s1.length()==s2.length();

1451

}

1452

1453

public static boolean map3diPeptideToProteinAligment(

1454

AlignmentI proteinAlignment, AlignmentI tdiAlignment)

1455

{

1456

if (proteinAlignment==null || tdiAlignment==null)

{

return false;

}

Set<SequenceI> mappedDna = new HashSet<>();

1461

Set<SequenceI> mappedProtein = new HashSet<>();

1462

1463

1464

* First pass - map sequences where cross-references exist. This include

1465

* 1-to-many mappings to support, for example, variant cDNA.

1466

1467

boolean mappingPerformed = mapProteinToTdiAlignment(proteinAlignment,

1468

tdiAlignment, mappedDna, mappedProtein);

1469

return mappingPerformed;

}

/**

* Make mappings between compatible sequences (ids are identical, length of seqs are identical).

1477

1478

* @param proteinAlignment

1479

* @param tdiAlignment

1480

* @param mappedTdiSeq

1481

* a set of mapped DNA sequences (to add to)

1482

* @param mappedProtein

1483

* a set of mapped Protein sequences (to add to)

1484

* @param xrefsOnly

1485

* if true, only map sequences where xrefs exist

1486

* @return

1487

1488

protected static boolean mapProteinToTdiAlignment(

1489

final AlignmentI proteinAlignment, final AlignmentI tdiAlignment,

1490

Set<SequenceI> mappedTdiSeq, Set<SequenceI> mappedProtein)

1491

{

1492

boolean mappingExistsOrAdded = false;

1493

List<SequenceI> thisSeqs = proteinAlignment.getSequences();

1494

for (SequenceI aaSeq : thisSeqs)

1495

{

1496

aaSeq = aaSeq.getDatasetSequence()!=null ? aaSeq.getDatasetSequence():null;

1497

boolean proteinMapped = false;

1498

AlignedCodonFrame acf = new AlignedCodonFrame();

1499

1500

for (SequenceI cdnaSeq : tdiAlignment.getSequences())

1501

{

1502

// resolve dataset sequences

1503

cdnaSeq = cdnaSeq.getDatasetSequence()!=null ? cdnaSeq.getDatasetSequence():null;

1504

1505

if (mappingExists(proteinAlignment.getCodonFrames(),

1506

aaSeq, cdnaSeq))

1507

{

1508

mappingExistsOrAdded = true;

}

else

{

if (canBuild3diMapping(aaSeq, cdnaSeq))

1514

{

1515

MapList map = new MapList(new int[] { aaSeq.getStart(),aaSeq.getEnd()},new int[] { cdnaSeq.getStart(),cdnaSeq.getEnd()},1,1);

1516

acf.addMap(cdnaSeq, aaSeq, map);

1517

mappingExistsOrAdded = true;

1518

proteinMapped = true;

1519

mappedTdiSeq.add(cdnaSeq);

1520

mappedProtein.add(aaSeq);

}

}

}

if (proteinMapped)

{

proteinAlignment.addCodonFrame(acf);

1527

}

1528

}

1529

return mappingExistsOrAdded;

}

/**

* Returns true if the dna sequence is mapped, or could be mapped, to the

* protein sequence.

* @param dnaSeq

* @param proteinSeq

* @param mappings

* @return

protected static boolean isMappable(SequenceI dnaSeq,

1543

SequenceI proteinSeq, List<AlignedCodonFrame> mappings)

1544

{

1545

if (dnaSeq == null || proteinSeq == null)

{

return false;

}

SequenceI dnaDs = dnaSeq.getDatasetSequence() == null ? dnaSeq

1551

: dnaSeq.getDatasetSequence();

1552

SequenceI proteinDs = proteinSeq.getDatasetSequence() == null

1553

? proteinSeq

1554

: proteinSeq.getDatasetSequence();

1555

1556

for (AlignedCodonFrame mapping : mappings)

1557

{

1558

if (proteinDs == mapping.getAaForDnaSeq(dnaDs))

{

* already mapped

return true;

}

}

* Just try to make a mapping (it is not yet stored), test whether

1569

* successful.

1570

1571

return mapCdnaToProtein(proteinDs, dnaDs) != null;

}

/**

* Finds any reference annotations associated with the sequences in

1576

* sequenceScope, that are not already added to the alignment, and adds them

1577

* to the 'candidates' map. Also populates a lookup table of annotation

1578

* labels, keyed by calcId, for use in constructing tooltips or the like.

1579

1580

* @param sequenceScope

1581

* the sequences to scan for reference annotations

1582

* @param labelForCalcId

1583

* (optional) map to populate with label for calcId

1584

* @param candidates

1585

* map to populate with annotations for sequence

1586

* @param al

1587

* the alignment to check for presence of annotations

1588

1589

public static void findAddableReferenceAnnotations(

1590

List<SequenceI> sequenceScope, Map<String, String> labelForCalcId,

1591

final Map<SequenceI, List<AlignmentAnnotation>> candidates,

1592

AlignmentI al)

1593

{

1594

if (sequenceScope == null)

{

return;

}

* For each sequence in scope, make a list of any annotations on the

1601

* underlying dataset sequence which are not already on the alignment.

1602

1603

* Add to a map of { alignmentSequence, <List of annotations to add> }

1604

1605

for (SequenceI seq : sequenceScope)

1606

{

1607

SequenceI dataset = seq.getDatasetSequence();

if (dataset == null)

{

continue;

}

AlignmentAnnotation[] datasetAnnotations = dataset.getAnnotation();

1613

if (datasetAnnotations == null)

{

continue;

}

final List<AlignmentAnnotation> result = new ArrayList<>();

1618

for (AlignmentAnnotation dsann : datasetAnnotations)

1619

{

1620

1621

* Find matching annotations on the alignment. If none is found, then

1622

* add this annotation to the list of 'addable' annotations for this

1623

* sequence.

1624

1625

final Iterable<AlignmentAnnotation> matchedAlignmentAnnotations = al

1626

.findAnnotations(seq, dsann.getCalcId(), dsann.label);

1627

boolean found = false;

1628

if (matchedAlignmentAnnotations != null)

1629

{

1630

for (AlignmentAnnotation matched : matchedAlignmentAnnotations)

1631

{

1632

if (dsann.description.equals(matched.description))

{

found = true;

break;

}

}

}

if (!found)

{

result.add(dsann);

if (labelForCalcId != null)

1643

{

1644

labelForCalcId.put(dsann.getCalcId(), dsann.label);

}

}

}

* Save any addable annotations for this sequence

1650

1651

if (!result.isEmpty())

1652

{

1653

candidates.put(seq, result);

}

}

}

/**

* Adds annotations to the top of the alignment annotations, in the same order

1660

* as their related sequences. If you already have an annotation and want to

1661

* add it to a sequence in an alignment use {@code addReferenceAnnotationTo}

1662

1663

* @param annotations

1664

* the annotations to add

1665

* @param alignment

1666

* the alignment to add them to

1667

* @param selectionGroup

1668

* current selection group - may be null, if provided then any added

1669

* annotation will be trimmed to just those columns in the selection

1670

* group

1671

1672

public static void addReferenceAnnotations(

1673

Map<SequenceI, List<AlignmentAnnotation>> annotations,

1674

final AlignmentI alignment, final SequenceGroup selectionGroup)

1675

{

1676

for (SequenceI seq : annotations.keySet())

1677

{

1678

for (AlignmentAnnotation ann : annotations.get(seq))

1679

{

1680

addReferenceAnnotationTo(alignment, seq, ann, selectionGroup);

}

}

}

public static boolean isSSAnnotationPresent(

1686

Map<SequenceI, List<AlignmentAnnotation>> annotations)

1687

{

1688

1689

for (SequenceI seq : annotations.keySet())

1690

{

1691

if (isSecondaryStructurePresent(

1692

annotations.get(seq).toArray(new AlignmentAnnotation[0])))

{

return true;

}

}

return false;

}

/**

* Make a copy of a reference annotation {@code ann} and add it to an

1702

* alignment sequence {@code seq} in {@code alignment}, optionally limited to

1703

* the extent of {@code selectionGroup}

* @param alignment

* @param seq

* @param ann

* @param selectionGroup

1709

* current selection group - may be null, if provided then any added

1710

* annotation will be trimmed to just those columns in the selection

1711

* group

1712

* @return annotation added to {@code seq and {@code alignment}

1713

1714

public static AlignmentAnnotation addReferenceAnnotationTo(

1715

final AlignmentI alignment, final SequenceI seq,

1716

final AlignmentAnnotation ann, final SequenceGroup selectionGroup)

1717

{

1718

AlignmentAnnotation copyAnn = new AlignmentAnnotation(ann);

1719

int startRes = 0;

1720

int endRes = ann.annotations.length;

1721

if (selectionGroup != null)

1722

{

1723

startRes = -1 + Math.min(seq.getEnd(), Math.max(seq.getStart(),

1724

seq.findPosition(selectionGroup.getStartRes())));

1725

endRes = -1 + Math.min(seq.getEnd(),

1726

seq.findPosition(selectionGroup.getEndRes()));

1727

1728

}

1729

copyAnn.restrict(startRes, endRes + 0);

1730

1731

1732

* Add to the sequence (sets copyAnn.datasetSequence), unless the

1733

* original annotation is already on the sequence.

1734

1735

if (!seq.hasAnnotation(ann))

1736

{

1737

ContactMatrixI cm = seq.getDatasetSequence().getContactMatrixFor(ann);

1738

if (cm != null)

1739

{

1740

seq.addContactListFor(copyAnn, cm);

1741

}

1742

seq.addAlignmentAnnotation(copyAnn);

1743

}

1744

// adjust for gaps

1745

copyAnn.adjustForAlignment();

1746

// add to the alignment and set visible

1747

alignment.addAnnotation(copyAnn);

1748

copyAnn.visible = true;

return copyAnn;

}

/**

* Set visibility of alignment annotations of specified types (labels), for

1755

* specified sequences. This supports controls like "Show all secondary

1756

* structure", "Hide all Temp factor", etc.

1757

1758

* @al the alignment to scan for annotations

1759

* @param types

1760

* the types (labels) of annotations to be updated

1761

* @param forSequences

1762

* if not null, only annotations linked to one of these sequences are

1763

* in scope for update; if null, acts on all sequence annotations

1764

* @param anyType

1765

* if this flag is true, 'types' is ignored (label not checked)

1766

* @param doShow

1767

* if true, set visibility on, else set off

1768

1769

public static void showOrHideSequenceAnnotations(AlignmentI al,

1770

Collection<String> types, List<SequenceI> forSequences,

1771

boolean anyType, boolean doShow)

1772

{

1773

AlignmentAnnotation[] anns = al.getAlignmentAnnotation();

1774

if (anns != null)

1775

{

1776

for (AlignmentAnnotation aa : anns)

1777

{

1778

if (anyType || types.contains(aa.label))

1779

{

1780

if ((aa.sequenceRef != null) && (forSequences == null

1781

|| forSequences.contains(aa.sequenceRef)))

{

aa.visible = doShow;

}

}

}

}

}

/**

* Shows or hides auto calculated annotations for a sequence group.

1792

1793

* @param al

1794

* The alignment object with the annotations.

1795

* @param type

1796

* The type of annotation to show or hide.

1797

* @param selectedGroup

1798

* The sequence group for which the annotations should be shown or

1799

* hidden.

1800

* @param anyType

1801

* If true, all types of annotations will be shown/hidden.

1802

* @param doShow

1803

* If true, the annotations will be shown; if false, annotations will

1804

* be hidden.

1805

1806

public static void showOrHideAutoCalculatedAnnotationsForGroup(

1807

AlignmentI al, String type, SequenceGroup selectedGroup,

1808

boolean anyType, boolean doShow)

1809

{

1810

// Get all alignment annotations

1811

AlignmentAnnotation[] anns = al.getAlignmentAnnotation();

if (anns != null)

{

for (AlignmentAnnotation aa : anns)

1816

{

1817

// Check if anyType is true or if the annotation's label contains the

1818

// specified type (currently for secondary structure consensus)

1819

if ((anyType && aa.label

1820

.startsWith(Constants.SECONDARY_STRUCTURE_CONSENSUS_LABEL))

1821

|| aa.label.startsWith(type))

1822

{

1823

// If the annotation's group reference is not null and matches the

1824

// selected group, update its visibility.

1825

if (aa.groupRef != null && selectedGroup == aa.groupRef)

{

aa.visible = doShow;

}

}

}

}

}

public static AlignmentAnnotation getFirstSequenceAnnotationOfType(

1835

AlignmentI al, int graphType)

1836

{

1837

AlignmentAnnotation[] anns = al.getAlignmentAnnotation();

1838

if (anns != null)

1839

{

1840

for (AlignmentAnnotation aa : anns)

1841

{

1842

if (aa.sequenceRef != null && aa.graph == graphType)

return aa;

}

}

return null;

}

/**

* Returns true if either sequence has a cross-reference to the other

* @param seq1

* @param seq2

* @return

public static boolean haveCrossRef(SequenceI seq1, SequenceI seq2)

1857

{

1858

// Note: moved here from class CrossRef as the latter class has dependencies

1859

// not availability to the applet's classpath

1860

return hasCrossRef(seq1, seq2) || hasCrossRef(seq2, seq1);

}

/**

* Returns true if seq1 has a cross-reference to seq2. Currently this assumes

1865

* that sequence name is structured as Source|AccessionId.

* @param seq1

* @param seq2

* @return

108

public static boolean hasCrossRef(SequenceI seq1, SequenceI seq2)

1872

{

1873

108

if (seq1 == null || seq2 == null)

{

return false;

}

100

String name = seq2.getName();

1878

100

final List<DBRefEntry> xrefs = seq1.getDBRefs();

1879

100

if (xrefs != null)

1880

{

1881

for (int ix = 0, nx = xrefs.size(); ix < nx; ix++)

1882

{

1883

DBRefEntry xref = xrefs.get(ix);

1884

String xrefName = xref.getSource() + "|" + xref.getAccessionId();

1885

// case-insensitive test, consistent with DBRefEntry.equalRef()

1886

if (xrefName.equalsIgnoreCase(name))

{

return true;

}

}

}

return false;

}

/**

* Constructs an alignment consisting of the mapped (CDS) regions in the given

1897

* nucleotide sequences, and updates mappings to match. The CDS sequences are

1898

* added to the original alignment's dataset, which is shared by the new

1899

* alignment. Mappings from nucleotide to CDS, and from CDS to protein, are

1900

* added to the alignment dataset.

1901

1902

* @param dna

1903

* aligned nucleotide (dna or cds) sequences

1904

* @param dataset

1905

* the alignment dataset the sequences belong to

1906

* @param products

1907

* (optional) to restrict results to CDS that map to specified

1908

* protein products

1909

* @return an alignment whose sequences are the cds-only parts of the dna

1910

* sequences (or null if no mappings are found)

1911

1912

public static AlignmentI makeCdsAlignment(SequenceI[] dna,

1913

AlignmentI dataset, SequenceI[] products)

1914

{

1915

if (dataset == null || dataset.getDataset() != null)

1916

{

1917

throw new IllegalArgumentException(

1918

"IMPLEMENTATION ERROR: dataset.getDataset() must be null!");

1919

}

1920

List<SequenceI> foundSeqs = new ArrayList<>();

1921

List<SequenceI> cdsSeqs = new ArrayList<>();

1922

List<AlignedCodonFrame> mappings = dataset.getCodonFrames();

1923

HashSet<SequenceI> productSeqs = null;

1924

if (products != null)

1925

{

1926

productSeqs = new HashSet<>();

1927

for (SequenceI seq : products)

1928

{

1929

productSeqs.add(seq.getDatasetSequence() == null ? seq

1930

: seq.getDatasetSequence());

}

}

* Construct CDS sequences from mappings on the alignment dataset.

1936

* The logic is:

1937

* - find the protein product(s) mapped to from each dna sequence

1938

* - if the mapping covers the whole dna sequence (give or take start/stop

1939

* codon), take the dna as the CDS sequence

1940

* - else search dataset mappings for a suitable dna sequence, i.e. one

1941

* whose whole sequence is mapped to the protein

1942

* - if no sequence found, construct one from the dna sequence and mapping

1943

* (and add it to dataset so it is found if this is repeated)

1944

1945

for (SequenceI dnaSeq : dna)

1946

{

1947

SequenceI dnaDss = dnaSeq.getDatasetSequence() == null ? dnaSeq

1948

: dnaSeq.getDatasetSequence();

1949

1950

List<AlignedCodonFrame> seqMappings = MappingUtils

1951

.findMappingsForSequence(dnaSeq, mappings);

1952

for (AlignedCodonFrame mapping : seqMappings)

1953

{

1954

List<Mapping> mappingsFromSequence = mapping

1955

.getMappingsFromSequence(dnaSeq);

1956

1957

for (Mapping aMapping : mappingsFromSequence)

1958

{

1959

MapList mapList = aMapping.getMap();

1960

if (mapList.getFromRatio() == 1)

1961

{

1962

1963

* not a dna-to-protein mapping (likely dna-to-cds)

continue;

}

* skip if mapping is not to one of the target set of proteins

1970

1971

SequenceI proteinProduct = aMapping.getTo();

1972

if (productSeqs != null && !productSeqs.contains(proteinProduct))

{

continue;

}

* try to locate the CDS from the dataset mappings;

1979

* guard against duplicate results (for the case that protein has

1980

* dbrefs to both dna and cds sequences)

1981

1982

SequenceI cdsSeq = findCdsForProtein(mappings, dnaSeq,

1983

seqMappings, aMapping);

1984

if (cdsSeq != null)

1985

{

1986

if (!foundSeqs.contains(cdsSeq))

1987

{

1988

foundSeqs.add(cdsSeq);

1989

SequenceI derivedSequence = cdsSeq.deriveSequence();

1990

cdsSeqs.add(derivedSequence);

1991

if (!dataset.getSequences().contains(cdsSeq))

1992

{

1993

dataset.addSequence(cdsSeq);

}

}

continue;

}

* didn't find mapped CDS sequence - construct it and add

2001

* its dataset sequence to the dataset

2002

2003

cdsSeq = makeCdsSequence(dnaSeq.getDatasetSequence(), aMapping,

2004

dataset).deriveSequence();

2005

// cdsSeq has a name constructed as CDS|<dbref>

2006

// <dbref> will be either the accession for the coding sequence,

2007

// marked in the /via/ dbref to the protein product accession

2008

// or it will be the original nucleotide accession.

2009

SequenceI cdsSeqDss = cdsSeq.getDatasetSequence();

cdsSeqs.add(cdsSeq);

* build the mapping from CDS to protein

2015

2016

List<int[]> cdsRange = Collections

2017

.singletonList(new int[]

2018

{ cdsSeq.getStart(),

2019

cdsSeq.getLength() + cdsSeq.getStart() - 1 });

2020

MapList cdsToProteinMap = new MapList(cdsRange,

2021

mapList.getToRanges(), mapList.getFromRatio(),

2022

mapList.getToRatio());

2023

2024

if (!dataset.getSequences().contains(cdsSeqDss))

2025

{

2026

2027

* if this sequence is a newly created one, add it to the dataset

2028

* and made a CDS to protein mapping (if sequence already exists,

2029

* CDS-to-protein mapping _is_ the transcript-to-protein mapping)

2030

2031

dataset.addSequence(cdsSeqDss);

2032

AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();

2033

cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct,

cdsToProteinMap);

* guard against duplicating the mapping if repeating this action

2038

2039

if (!mappings.contains(cdsToProteinMapping))

2040

{

2041

mappings.add(cdsToProteinMapping);

}

}

propagateDBRefsToCDS(cdsSeqDss, dnaSeq.getDatasetSequence(),

2046

proteinProduct, aMapping);

2047

2048

* add another mapping from original 'from' range to CDS

2049

2050

AlignedCodonFrame dnaToCdsMapping = new AlignedCodonFrame();

2051

final MapList dnaToCdsMap = new MapList(mapList.getFromRanges(),

2052

cdsRange, 1, 1);

2053

dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeqDss,

2054

dnaToCdsMap);

2055

if (!mappings.contains(dnaToCdsMapping))

2056

{

2057

mappings.add(dnaToCdsMapping);

}

* transfer dna chromosomal loci (if known) to the CDS

2062

* sequence (via the mapping)

2063

2064

final MapList cdsToDnaMap = dnaToCdsMap.getInverse();

2065

transferGeneLoci(dnaSeq, cdsToDnaMap, cdsSeq);

2066

2067

2068

* add DBRef with mapping from protein to CDS

2069

* (this enables Get Cross-References from protein alignment)

2070

* This is tricky because we can't have two DBRefs with the

2071

* same source and accession, so need a different accession for

2072

* the CDS from the dna sequence

2073

2074

2075

// specific use case:

2076

// Genomic contig ENSCHR:1, contains coding regions for ENSG01,

2077

// ENSG02, ENSG03, with transcripts and products similarly named.

2078

// cannot add distinct dbrefs mapping location on ENSCHR:1 to ENSG01

2079

2080

// JBPNote: ?? can't actually create an example that demonstrates we

2081

// need to

2082

// synthesize an xref.

2083

2084

List<DBRefEntry> primrefs = dnaDss.getPrimaryDBRefs();

2085

for (int ip = 0, np = primrefs.size(); ip < np; ip++)

2086

{

2087

DBRefEntry primRef = primrefs.get(ip);

2088

2089

* create a cross-reference from CDS to the source sequence's

2090

* primary reference and vice versa

2091

2092

String source = primRef.getSource();

2093

String version = primRef.getVersion();

2094

DBRefEntry cdsCrossRef = new DBRefEntry(source,

2095

source + ":" + version, primRef.getAccessionId());

2096

cdsCrossRef

2097

.setMap(new Mapping(dnaDss, new MapList(cdsToDnaMap)));

2098

cdsSeqDss.addDBRef(cdsCrossRef);

2099

2100

dnaSeq.addDBRef(new DBRefEntry(source, version,

2101

cdsSeq.getName(), new Mapping(cdsSeqDss, dnaToCdsMap)));

2102

// problem here is that the cross-reference is synthesized -

2103

// cdsSeq.getName() may be like 'CDS|dnaaccession' or

2104

// 'CDS|emblcdsacc'

2105

// assuming cds version same as dna ?!?

2106

2107

DBRefEntry proteinToCdsRef = new DBRefEntry(source, version,

2108

cdsSeq.getName());

2109

2110

proteinToCdsRef.setMap(

2111

new Mapping(cdsSeqDss, cdsToProteinMap.getInverse()));

2112

proteinProduct.addDBRef(proteinToCdsRef);

2113

}

2114

2115

* transfer any features on dna that overlap the CDS

2116

2117

transferFeatures(dnaSeq, cdsSeq, dnaToCdsMap, null,

2118

SequenceOntologyI.CDS);

}

}

}

AlignmentI cds = new Alignment(

2124

cdsSeqs.toArray(new SequenceI[cdsSeqs.size()]));

2125

cds.setDataset(dataset);

return cds;

}

/**

* Tries to transfer gene loci (dbref to chromosome positions) from fromSeq to

2132

* toSeq, mediated by the given mapping between the sequences

2133

2134

* @param fromSeq

2135

* @param targetToFrom

* Map

* @param targetSeq

protected static void transferGeneLoci(SequenceI fromSeq,

2140

MapList targetToFrom, SequenceI targetSeq)

2141

{

2142

if (targetSeq.getGeneLoci() != null)

2143

{

2144

// already have - don't override

2145

return;

2146

}

2147

GeneLociI fromLoci = fromSeq.getGeneLoci();

2148

if (fromLoci == null)

{

return;

}

MapList newMap = targetToFrom.traverse(fromLoci.getMapping());

if (newMap != null)

{

targetSeq.setGeneLoci(fromLoci.getSpeciesId(),

2158

fromLoci.getAssemblyId(), fromLoci.getChromosomeId(), newMap);

}

}

/**

* A helper method that finds a CDS sequence in the alignment dataset that is

2164

* mapped to the given protein sequence, and either is, or has a mapping from,

2165

* the given dna sequence.

2166

2167

* @param mappings

2168

* set of all mappings on the dataset

2169

* @param dnaSeq

2170

* a dna (or cds) sequence we are searching from

2171

* @param seqMappings

2172

* the set of mappings involving dnaSeq

2173

* @param aMapping

2174

* a transcript-to-peptide mapping

2175

* @return

2176

2177

static SequenceI findCdsForProtein(List<AlignedCodonFrame> mappings,

2178

SequenceI dnaSeq, List<AlignedCodonFrame> seqMappings,

Mapping aMapping)

{

* TODO a better dna-cds-protein mapping data representation to allow easy

2183

* navigation; until then this clunky looping around lists of mappings

2184

2185

SequenceI seqDss = dnaSeq.getDatasetSequence() == null ? dnaSeq

2186

: dnaSeq.getDatasetSequence();

2187

SequenceI proteinProduct = aMapping.getTo();

2188

2189

2190

* is this mapping from the whole dna sequence (i.e. CDS)?

2191

* allowing for possible stop codon on dna but not peptide

2192

2193

int mappedFromLength = MappingUtils

2194

.getLength(aMapping.getMap().getFromRanges());

2195

int dnaLength = seqDss.getLength();

2196

if (mappedFromLength == dnaLength

2197

|| mappedFromLength == dnaLength - CODON_LENGTH)

2198

{

2199

2200

* if sequence has CDS features, this is a transcript with no UTR

2201

* - do not take this as the CDS sequence! (JAL-2789)

2202

2203

if (seqDss.getFeatures().getFeaturesByOntology(SequenceOntologyI.CDS)

.isEmpty())

{

return seqDss;

}

}

* looks like we found the dna-to-protein mapping; search for the

2212

* corresponding cds-to-protein mapping

2213

2214

List<AlignedCodonFrame> mappingsToPeptide = MappingUtils

2215

.findMappingsForSequence(proteinProduct, mappings);

2216

for (AlignedCodonFrame acf : mappingsToPeptide)

2217

{

2218

for (SequenceToSequenceMapping map : acf.getMappings())

2219

{

2220

Mapping mapping = map.getMapping();

2221

if (mapping != aMapping

2222

&& mapping.getMap().getFromRatio() == CODON_LENGTH

2223

&& proteinProduct == mapping.getTo()

2224

&& seqDss != map.getFromSeq())

2225

{

2226

mappedFromLength = MappingUtils

2227

.getLength(mapping.getMap().getFromRanges());

2228

if (mappedFromLength == map.getFromSeq().getLength())

2229

{

2230

2231

* found a 3:1 mapping to the protein product which covers

2232

* the whole dna sequence i.e. is from CDS; finally check the CDS

2233

* is mapped from the given dna start sequence

2234

2235

SequenceI cdsSeq = map.getFromSeq();

2236

// todo this test is weak if seqMappings contains multiple mappings;

2237

// we get away with it if transcript:cds relationship is 1:1

2238

List<AlignedCodonFrame> dnaToCdsMaps = MappingUtils

2239

.findMappingsForSequence(cdsSeq, seqMappings);

2240

if (!dnaToCdsMaps.isEmpty())

{

return cdsSeq;

}

}

}

}

}

return null;

}

/**

* Helper method that makes a CDS sequence as defined by the mappings from the

2253

* given sequence i.e. extracts the 'mapped from' ranges (which may be on

2254

* forward or reverse strand).

* @param seq

* @param mapping

* @param dataset

* - existing dataset. We check for sequences that look like the CDS

2260

* we are about to construct, if one exists already, then we will

2261

* just return that one.

2262

* @return CDS sequence (as a dataset sequence)

2263

2264

static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping,

AlignmentI dataset)

{

* construct CDS sequence name as "CDS|" with 'from id' held in the mapping

2269

* if set (e.g. EMBL protein_id), else sequence name appended

2270

2271

String mapFromId = mapping.getMappedFromId();

2272

final String seqId = "CDS|"

2273

+ (mapFromId != null ? mapFromId : seq.getName());

2274

2275

SequenceI newSeq = null;

2276

2277

2278

* construct CDS sequence by splicing mapped from ranges

2279

2280

char[] seqChars = seq.getSequence();

2281

List<int[]> fromRanges = mapping.getMap().getFromRanges();

2282

int cdsWidth = MappingUtils.getLength(fromRanges);

2283

char[] newSeqChars = new char[cdsWidth];

2284

2285

int newPos = 0;

2286

for (int[] range : fromRanges)

2287

{

2288

if (range[0] <= range[1])

2289

{

2290

// forward strand mapping - just copy the range

2291

int length = range[1] - range[0] + 1;

2292

System.arraycopy(seqChars, range[0] - 1, newSeqChars, newPos,

length);

newPos += length;

}

else

{

// reverse strand mapping - copy and complement one by one

2299

for (int i = range[0]; i >= range[1]; i--)

2300

{

2301

newSeqChars[newPos++] = Dna.getComplement(seqChars[i - 1]);

}

}

newSeq = new Sequence(seqId, newSeqChars, 1, newPos);

}

if (dataset != null)

{

SequenceI[] matches = dataset.findSequenceMatch(newSeq.getName());

2311

if (matches != null)

2312

{

2313

boolean matched = false;

2314

for (SequenceI mtch : matches)

2315

{

2316

if (mtch.getStart() != newSeq.getStart())

{

continue;

}

if (mtch.getEnd() != newSeq.getEnd())

{

continue;

}

if (!Arrays.equals(mtch.getSequence(), newSeq.getSequence()))

{

continue;

}

if (!matched)

{

matched = true;

newSeq = mtch;

}

else

{

Console.error(

"JAL-2154 regression: warning - found (and ignored) a duplicate CDS sequence:"

+ mtch.toString());

}

}

}

}

// newSeq.setDescription(mapFromId);

return newSeq;

}

/**

* Adds any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to

* the given mapping.

* @param cdsSeq

* @param contig

* @param proteinProduct

2354

* @param mapping

2355

* @return list of DBRefEntrys added

2356

2357

protected static List<DBRefEntry> propagateDBRefsToCDS(SequenceI cdsSeq,

2358

SequenceI contig, SequenceI proteinProduct, Mapping mapping)

2359

{

2360

2361

// gather direct refs from contig congruent with mapping

2362

List<DBRefEntry> direct = new ArrayList<>();

2363

HashSet<String> directSources = new HashSet<>();

2364

2365

List<DBRefEntry> refs = contig.getDBRefs();

2366

if (refs != null)

2367

{

2368

for (int ib = 0, nb = refs.size(); ib < nb; ib++)

2369

{

2370

DBRefEntry dbr = refs.get(ib);

2371

MapList map;

2372

if (dbr.hasMap() && (map = dbr.getMap().getMap()).isTripletMap())

2373

{

2374

// check if map is the CDS mapping

2375

if (mapping.getMap().equals(map))

2376

{

2377

direct.add(dbr);

2378

directSources.add(dbr.getSource());

}

}

}

}

List<DBRefEntry> onSource = DBRefUtils.selectRefs(

2384

proteinProduct.getDBRefs(),

2385

directSources.toArray(new String[0]));

2386

List<DBRefEntry> propagated = new ArrayList<>();

2387

2388

// and generate appropriate mappings

2389

for (int ic = 0, nc = direct.size(); ic < nc; ic++)

2390

{

2391

DBRefEntry cdsref = direct.get(ic);

2392

Mapping m = cdsref.getMap();

2393

// clone maplist and mapping

2394

MapList cdsposmap = new MapList(

2395

Arrays.asList(new int[][]

2396

{ new int[] { cdsSeq.getStart(), cdsSeq.getEnd() } }),

2397

m.getMap().getToRanges(), 3, 1);

2398

Mapping cdsmap = new Mapping(m.getTo(), m.getMap());

2399

2400

// create dbref

2401

DBRefEntry newref = new DBRefEntry(cdsref.getSource(),

2402

cdsref.getVersion(), cdsref.getAccessionId(),

2403

new Mapping(cdsmap.getTo(), cdsposmap));

2404

2405

// and see if we can map to the protein product for this mapping.

2406

// onSource is the filtered set of accessions on protein that we are

2407

// tranferring, so we assume accession is the same.

2408

if (cdsmap.getTo() == null && onSource != null)

2409

{

2410

List<DBRefEntry> sourceRefs = DBRefUtils.searchRefs(onSource,

2411

cdsref.getAccessionId());

2412

if (sourceRefs != null)

2413

{

2414

for (DBRefEntry srcref : sourceRefs)

2415

{

2416

if (srcref.getSource().equalsIgnoreCase(cdsref.getSource()))

2417

{

2418

// we have found a complementary dbref on the protein product, so

2419

// update mapping's getTo

2420

newref.getMap().setTo(proteinProduct);

}

}

}

}

cdsSeq.addDBRef(newref);

2426

propagated.add(newref);

}

return propagated;

}

/**

* Transfers co-located features on 'fromSeq' to 'toSeq', adjusting the

2433

* feature start/end ranges, optionally omitting specified feature types.

2434

* Returns the number of features copied.

* @param fromSeq

* @param toSeq

* @param mapping

* the mapping from 'fromSeq' to 'toSeq'

2440

* @param select

2441

* if not null, only features of this type are copied (including

2442

* subtypes in the Sequence Ontology)

2443

* @param omitting

2444

2445

protected static int transferFeatures(SequenceI fromSeq, SequenceI toSeq,

2446

MapList mapping, String select, String... omitting)

2447

{

2448

SequenceI copyTo = toSeq;

2449

while (copyTo.getDatasetSequence() != null)

2450

{

2451

copyTo = copyTo.getDatasetSequence();

2452

}

2453

if (fromSeq == copyTo || fromSeq.getDatasetSequence() == copyTo)

2454

{

2455

return 0; // shared dataset sequence

}

* get features, optionally restricted by an ontology term

2460

2461

List<SequenceFeature> sfs = select == null

2462

? fromSeq.getFeatures().getPositionalFeatures()

2463

: fromSeq.getFeatures().getFeaturesByOntology(select);

2464

2465

int count = 0;

2466

for (SequenceFeature sf : sfs)

2467

{

2468

String type = sf.getType();

2469

boolean omit = false;

2470

for (String toOmit : omitting)

2471

{

2472

if (type.equals(toOmit))

{

omit = true;

}

}

if (omit)

{

continue;

}

* locate the mapped range - null if either start or end is

2484

* not mapped (no partial overlaps are calculated)

2485

2486

int start = sf.getBegin();

2487

int end = sf.getEnd();

2488

int[] mappedTo = mapping.locateInTo(start, end);

2489

2490

* if whole exon range doesn't map, try interpreting it

2491

* as 5' or 3' exon overlapping the CDS range

2492

2493

if (mappedTo == null)

2494

{

2495

mappedTo = mapping.locateInTo(end, end);

2496

if (mappedTo != null)

2497

{

2498

2499

* end of exon is in CDS range - 5' overlap

2500

* to a range from the start of the peptide

mappedTo[0] = 1;

}

}

if (mappedTo == null)

2506

{

2507

mappedTo = mapping.locateInTo(start, start);

2508

if (mappedTo != null)

2509

{

2510

2511

* start of exon is in CDS range - 3' overlap

2512

* to a range up to the end of the peptide

2513

2514

mappedTo[1] = toSeq.getLength();

2515

}

2516

}

2517

if (mappedTo != null)

2518

{

2519

int newBegin = Math.min(mappedTo[0], mappedTo[1]);

2520

int newEnd = Math.max(mappedTo[0], mappedTo[1]);

2521

SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd,

2522

sf.getFeatureGroup(), sf.getScore());

2523

copyTo.addSequenceFeature(copy);

count++;

}

}

return count;

}

/**

* Returns a mapping from dna to protein by inspecting sequence features of

2532

* type "CDS" on the dna. A mapping is constructed if the total CDS feature

2533

* length is 3 times the peptide length (optionally after dropping a trailing

2534

* stop codon). This method does not check whether the CDS nucleotide sequence

2535

* translates to the peptide sequence.

* @param dnaSeq

* @param proteinSeq

* @return

public static MapList mapCdsToProtein(SequenceI dnaSeq,

2542

SequenceI proteinSeq)

2543

{

2544

List<int[]> ranges = findCdsPositions(dnaSeq);

2545

int mappedDnaLength = MappingUtils.getLength(ranges);

2546

2547

2548

* if not a whole number of codons, truncate mapping

2549

2550

int codonRemainder = mappedDnaLength % CODON_LENGTH;

2551

if (codonRemainder > 0)

2552

{

2553

mappedDnaLength -= codonRemainder;

2554

MappingUtils.removeEndPositions(codonRemainder, ranges);

2555

}

2556

2557

int proteinLength = proteinSeq.getLength();

2558

int proteinStart = proteinSeq.getStart();

2559

int proteinEnd = proteinSeq.getEnd();

2560

2561

2562

* incomplete start codon may mean X at start of peptide

2563

* we ignore both for mapping purposes

2564

2565

if (proteinSeq.getCharAt(0) == 'X')

2566

{

2567

// todo JAL-2022 support startPhase > 0

proteinStart++;

proteinLength--;

}

List<int[]> proteinRange = new ArrayList<>();

2572

2573

2574

* dna length should map to protein (or protein plus stop codon)

2575

2576

int codesForResidues = mappedDnaLength / CODON_LENGTH;

2577

if (codesForResidues == (proteinLength + 1))

2578

{

2579

// assuming extra codon is for STOP and not in peptide

2580

// todo: check trailing codon is indeed a STOP codon

2581

codesForResidues--;

2582

mappedDnaLength -= CODON_LENGTH;

2583

MappingUtils.removeEndPositions(CODON_LENGTH, ranges);

2584

}

2585

2586

if (codesForResidues == proteinLength)

2587

{

2588

proteinRange.add(new int[] { proteinStart, proteinEnd });

2589

return new MapList(ranges, proteinRange, CODON_LENGTH, 1);

}

return null;

}

/**

* Returns a list of CDS ranges found (as sequence positions base 1), i.e. of

2596

* [start, end] positions of sequence features of type "CDS" (or a sub-type of

2597

* CDS in the Sequence Ontology). The ranges are sorted into ascending start

2598

* position order, so this method is only valid for linear CDS in the same

2599

* sense as the protein product.

* @param dnaSeq

* @return

protected static List<int[]> findCdsPositions(SequenceI dnaSeq)

2605

{

2606

List<int[]> result = new ArrayList<>();

2607

2608

List<SequenceFeature> sfs = dnaSeq.getFeatures()

2609

.getFeaturesByOntology(SequenceOntologyI.CDS);

if (sfs.isEmpty())

{

return result;

}

SequenceFeatures.sortFeatures(sfs, true);

2615

2616

for (SequenceFeature sf : sfs)

{

int phase = 0;

try

{

String s = sf.getPhase();

2622

if (s != null)

2623

{

2624

phase = Integer.parseInt(s);

2625

}

2626

} catch (NumberFormatException e)

{

// leave as zero

}

* phase > 0 on first codon means 5' incomplete - skip to the start

2632

* of the next codon; example ENST00000496384

2633

2634

int begin = sf.getBegin();

2635

int end = sf.getEnd();

2636

if (result.isEmpty() && phase > 0)

{

begin += phase;

if (begin > end)

{

// shouldn't happen!

System.err

.println("Error: start phase extends beyond start CDS in "

+ dnaSeq.getName());

}

}

result.add(new int[] { begin, end });

}

* Finally sort ranges by start position. This avoids a dependency on

2652

* keeping features in order on the sequence (if they are in order anyway,

2653

* the sort will have almost no work to do). The implicit assumption is CDS

2654

* ranges are assembled in order. Other cases should not use this method,

2655

* but instead construct an explicit mapping for CDS (e.g. EMBL parsing).

2656

2657

Collections.sort(result, IntRangeComparator.ASCENDING);

return result;

}

/**

* Makes an alignment with a copy of the given sequences, adding in any

2663

* non-redundant sequences which are mapped to by the cross-referenced

* sequences.

* @param seqs

* @param xrefs

* @param dataset

* the alignment dataset shared by the new copy

2670

* @return

2671

2672

public static AlignmentI makeCopyAlignment(SequenceI[] seqs,

2673

SequenceI[] xrefs, AlignmentI dataset)

2674

{

2675

AlignmentI copy = new Alignment(new Alignment(seqs));

2676

copy.setDataset(dataset);

2677

boolean isProtein = !copy.isNucleotide();

2678

SequenceIdMatcher matcher = new SequenceIdMatcher(seqs);

2679

if (xrefs != null)

2680

{

2681

// BH 2019.01.25 recoded to remove iterators

2682

2683

for (int ix = 0, nx = xrefs.length; ix < nx; ix++)

2684

{

2685

SequenceI xref = xrefs[ix];

2686

List<DBRefEntry> dbrefs = xref.getDBRefs();

2687

if (dbrefs != null)

2688

{

2689

for (int ir = 0, nir = dbrefs.size(); ir < nir; ir++)

2690

{

2691

DBRefEntry dbref = dbrefs.get(ir);

2692

Mapping map = dbref.getMap();

2693

SequenceI mto;

2694

if (map == null || (mto = map.getTo()) == null

2695

|| mto.isProtein() != isProtein)

{

continue;

}

SequenceI mappedTo = mto;

2700

SequenceI match = matcher.findIdMatch(mappedTo);

2701

if (match == null)

2702

{

2703

matcher.add(mappedTo);

2704

copy.addSequence(mappedTo);

}

}

}

}

}

return copy;

}

/**

* Try to align sequences in 'unaligned' to match the alignment of their

2715

* mapped regions in 'aligned'. For example, could use this to align CDS

2716

* sequences which are mapped to their parent cDNA sequences.

2717

2718

* This method handles 1:1 mappings (dna-to-dna or protein-to-protein). For

2719

* dna-to-protein or protein-to-dna use alternative methods.

2720

2721

* @param unaligned

2722

* sequences to be aligned

2723

* @param aligned

2724

* holds aligned sequences and their mappings

2725

* @return

2726

2727

public static int alignAs(AlignmentI unaligned, AlignmentI aligned)

2728

{

2729

2730

* easy case - aligning a copy of aligned sequences

2731

2732

if (alignAsSameSequences(unaligned, aligned))

2733

{

2734

return unaligned.getHeight();

}

* fancy case - aligning via mappings between sequences

2739

2740

List<SequenceI> unmapped = new ArrayList<>();

2741

Map<Integer, Map<SequenceI, Character>> columnMap = buildMappedColumnsMap(

2742

unaligned, aligned, unmapped);

2743

int width = columnMap.size();

2744

char gap = unaligned.getGapCharacter();

2745

int realignedCount = 0;

2746

// TODO: verify this loop scales sensibly for very wide/high alignments

2747

2748

for (SequenceI seq : unaligned.getSequences())

2749

{

2750

if (!unmapped.contains(seq))

2751

{

2752

char[] newSeq = new char[width];

2753

Arrays.fill(newSeq, gap); // JBPComment - doubt this is faster than the

2754

// Integer iteration below

int newCol = 0;

int lastCol = 0;

* traverse the map to find columns populated

2760

* by our sequence

2761

2762

for (Integer column : columnMap.keySet())

2763

{

2764

Character c = columnMap.get(column).get(seq);

if (c != null)

{

* sequence has a character at this position

newSeq[newCol] = c;

lastCol = newCol;

}

newCol++;

}

* trim trailing gaps

if (lastCol < width)

{

char[] tmp = new char[lastCol + 1];

2783

System.arraycopy(newSeq, 0, tmp, 0, lastCol + 1);

2784

newSeq = tmp;

2785

}

2786

// TODO: optimise SequenceI to avoid char[]->String->char[]

2787

seq.setSequence(String.valueOf(newSeq));

realignedCount++;

}

}

return realignedCount;

}

/**

* If unaligned and aligned sequences share the same dataset sequences, then

2796

* simply copies the aligned sequences to the unaligned sequences and returns

2797

* true; else returns false

2798

2799

* @param unaligned

2800

* - sequences to be aligned based on aligned

2801

* @param aligned

2802

* - 'guide' alignment containing sequences derived from same dataset

* as unaligned

* @return

static boolean alignAsSameSequences(AlignmentI unaligned,

2807

AlignmentI aligned)

2808

{

2809

if (aligned.getDataset() == null || unaligned.getDataset() == null)

2810

{

2811

return false; // should only pass alignments with datasets here

2812

}

2813

2814

// map from dataset sequence to alignment sequence(s)

2815

Map<SequenceI, List<SequenceI>> alignedDatasets = new HashMap<>();

2816

for (SequenceI seq : aligned.getSequences())

2817

{

2818

SequenceI ds = seq.getDatasetSequence();

2819

if (alignedDatasets.get(ds) == null)

2820

{

2821

alignedDatasets.put(ds, new ArrayList<SequenceI>());

2822

}

2823

alignedDatasets.get(ds).add(seq);

}

* first pass - check whether all sequences to be aligned share a

2828

* dataset sequence with an aligned sequence; also note the leftmost

2829

* ungapped column from which to copy

2830

2831

int leftmost = Integer.MAX_VALUE;

2832

for (SequenceI seq : unaligned.getSequences())

2833

{

2834

final SequenceI ds = seq.getDatasetSequence();

2835

if (!alignedDatasets.containsKey(ds))

{

return false;

}

SequenceI alignedSeq = alignedDatasets.get(ds).get(0);

2840

int startCol = alignedSeq.findIndex(seq.getStart()); // 1..

2841

leftmost = Math.min(leftmost, startCol);

}

* second pass - copy aligned sequences;

2846

* heuristic rule: pair off sequences in order for the case where

2847

* more than one shares the same dataset sequence

2848

2849

final char gapCharacter = aligned.getGapCharacter();

2850

for (SequenceI seq : unaligned.getSequences())

2851

{

2852

List<SequenceI> alignedSequences = alignedDatasets

2853

.get(seq.getDatasetSequence());

2854

if (alignedSequences.isEmpty())

2855

{

2856

2857

* defensive check - shouldn't happen! (JAL-3536)

continue;

}

SequenceI alignedSeq = alignedSequences.get(0);

2862

2863

2864

* gap fill for leading (5') UTR if any

2865

2866

// TODO this copies intron columns - wrong!

2867

int startCol = alignedSeq.findIndex(seq.getStart()); // 1..

2868

int endCol = alignedSeq.findIndex(seq.getEnd());

2869

char[] seqchars = new char[endCol - leftmost + 1];

2870

Arrays.fill(seqchars, gapCharacter);

2871

char[] toCopy = alignedSeq.getSequence(startCol - 1, endCol);

2872

System.arraycopy(toCopy, 0, seqchars, startCol - leftmost,

2873

toCopy.length);

2874

seq.setSequence(String.valueOf(seqchars));

2875

if (alignedSequences.size() > 0)

2876

{

2877

// pop off aligned sequences (except the last one)

2878

alignedSequences.remove(0);

}

}

* finally remove gapped columns (e.g. introns)

2884

2885

new RemoveGapColCommand("", unaligned.getSequencesArray(), 0,

2886

unaligned.getWidth() - 1, unaligned);

return true;

}

/**

* Returns a map whose key is alignment column number (base 1), and whose

2893

* values are a map of sequence characters in that column.

* @param unaligned

* @param aligned

* @param unmapped

* @return

static SortedMap<Integer, Map<SequenceI, Character>> buildMappedColumnsMap(

2901

AlignmentI unaligned, AlignmentI aligned,

2902

List<SequenceI> unmapped)

2903

{

2904

2905

* Map will hold, for each aligned column position, a map of

2906

* {unalignedSequence, characterPerSequence} at that position.

2907

* TreeMap keeps the entries in ascending column order.

2908

2909

SortedMap<Integer, Map<SequenceI, Character>> map = new TreeMap<>();

2910

2911

2912

* record any sequences that have no mapping so can't be realigned

2913

2914

unmapped.addAll(unaligned.getSequences());

2915

2916

List<AlignedCodonFrame> mappings = aligned.getCodonFrames();

2917

2918

for (SequenceI seq : unaligned.getSequences())

2919

{

2920

for (AlignedCodonFrame mapping : mappings)

2921

{

2922

SequenceI fromSeq = mapping.findAlignedSequence(seq, aligned);

2923

if (fromSeq != null)

2924

{

2925

Mapping seqMap = mapping.getMappingBetween(fromSeq, seq);

2926

if (addMappedPositions(seq, fromSeq, seqMap, map))

2927

{

2928

unmapped.remove(seq);

}

}

}

}

return map;

}

/**

* Helper method that adds to a map the mapped column positions of a sequence.

2938

* <br>

2939

* For example if aaTT-Tg-gAAA is mapped to TTTAAA then the map should record

2940

* that columns 3,4,6,10,11,12 map to characters T,T,T,A,A,A of the mapped to

* sequence.

* @param seq

* the sequence whose column positions we are recording

2945

* @param fromSeq

2946

* a sequence that is mapped to the first sequence

2947

* @param seqMap

2948

* the mapping from 'fromSeq' to 'seq'

2949

* @param map

2950

* a map to add the column positions (in fromSeq) of the mapped

* positions of seq

* @return

static boolean addMappedPositions(SequenceI seq, SequenceI fromSeq,

2955

Mapping seqMap, Map<Integer, Map<SequenceI, Character>> map)

{

if (seqMap == null)

{

return false;

}

* invert mapping if it is from unaligned to aligned sequence

2964

2965

if (seqMap.getTo() == fromSeq.getDatasetSequence())

2966

{

2967

seqMap = new Mapping(seq.getDatasetSequence(),

2968

seqMap.getMap().getInverse());

2969

}

2970

2971

int toStart = seq.getStart();

2972

2973

2974

* traverse [start, end, start, end...] ranges in fromSeq

2975

2976

for (int[] fromRange : seqMap.getMap().getFromRanges())

2977

{

2978

for (int i = 0; i < fromRange.length - 1; i += 2)

2979

{

2980

boolean forward = fromRange[i + 1] >= fromRange[i];

2981

2982

2983

* find the range mapped to (sequence positions base 1)

2984

2985

int[] range = seqMap.locateMappedRange(fromRange[i],

fromRange[i + 1]);

if (range == null)

{

jalview.bin.Console.errPrintln("Error in mapping " + seqMap

2990

+ " from " + fromSeq.getName());

2991

return false;

2992

}

2993

int fromCol = fromSeq.findIndex(fromRange[i]);

2994

int mappedCharPos = range[0];

2995

2996

2997

* walk over the 'from' aligned sequence in forward or reverse

2998

* direction; when a non-gap is found, record the column position

2999

* of the next character of the mapped-to sequence; stop when all

3000

* the characters of the range have been counted

3001

3002

while (mappedCharPos <= range[1] && fromCol <= fromSeq.getLength()

3003

&& fromCol >= 0)

3004

{

3005

if (!Comparison.isGap(fromSeq.getCharAt(fromCol - 1)))

3006

{

3007

3008

* mapped from sequence has a character in this column

3009

* record the column position for the mapped to character

3010

3011

Map<SequenceI, Character> seqsMap = map.get(fromCol);

3012

if (seqsMap == null)

3013

{

3014

seqsMap = new HashMap<>();

3015

map.put(fromCol, seqsMap);

3016

}

3017

seqsMap.put(seq, seq.getCharAt(mappedCharPos - toStart));

3018

mappedCharPos++;

3019

}

3020

fromCol += (forward ? 1 : -1);

}

}

}

return true;

}

// strictly temporary hack until proper criteria for aligning protein to cds

3028

// are in place; this is so Ensembl -> fetch xrefs Uniprot aligns the Uniprot

3029

public static boolean looksLikeEnsembl(AlignmentI alignment)

3030

{

3031

for (SequenceI seq : alignment.getSequences())

3032

{

3033

String name = seq.getName();

3034

if (!name.startsWith("ENSG") && !name.startsWith("ENST"))

{

return false;

}

}

return true;

}

public static boolean isSecondaryStructurePresent(

3043

AlignmentAnnotation[] annotations)

3044

{

3045

boolean ssPresent = false;

3046

3047

for (AlignmentAnnotation aa : annotations)

{

if (ssPresent)

{

break;

}

if (Constants.SECONDARY_STRUCTURE_LABELS.containsKey(aa.label))

{

ssPresent = true;

break;

}

}

return ssPresent;

}

public static Color getSecondaryStructureAnnotationColour(char symbol)

3066

{

3067

3068

if (symbol == Constants.COIL)

{

return Color.gray;

}

if (symbol == Constants.SHEET)

{

return Color.green;

}

if (symbol == Constants.HELIX)

{

return Color.red;

}

return Color.white;

}

19832

public static char findSSAnnotationForGivenSeqposition(

3085

AlignmentAnnotation aa, int seqPosition)

3086

{

3087

19832

char ss = '*';

3088

3089

19832

if (aa != null)

3090

{

3091

19832

if (aa.getAnnotationForPosition(seqPosition) != null)

3092

{

3093

6340

Annotation a = aa.getAnnotationForPosition(seqPosition);

3094

6340

ss = a.secondaryStructure;

3095

3096

// There is no representation for coil and it can be either ' ' or null.

3097

6340

if (ss == ' ' || ss == '-')

{

ss = Constants.COIL;

}

}

else

{

13492

ss = Constants.COIL;

}

}

19832

return ss;

3109

}

3110

3111

617

public static List<String> extractSSSourceInAlignmentAnnotation(

3112

AlignmentAnnotation[] annotations)

3113

{

3114

3115

617

List<String> ssSources = new ArrayList<>();

3116

617

Set<String> addedSources = new HashSet<>(); // to keep track of added

3117

// sources

3118

3119

617

if (annotations == null)

{

return ssSources;

}

617

for (AlignmentAnnotation aa : annotations)

3125

{

3126

3127

2106

String ssSource = extractSSSourceFromAnnotationDescription(aa);

3128

3129

2106

if (ssSource != null && !addedSources.contains(ssSource))

3130

{

3131

ssSources.add(ssSource);

3132

addedSources.add(ssSource);

}

}

617

Collections.sort(ssSources);

3137

3138

617

return ssSources;

}

14605

public static String extractSSSourceFromAnnotationDescription(

3143

AlignmentAnnotation aa)

3144

{

3145

3146

14605

for (String label : Constants.SECONDARY_STRUCTURE_LABELS.keySet())

3147

{

3148

3149

16639

if (label.equals(aa.label))

3150

{

3151

3152

12571

if (aa.getProperty(Constants.SS_PROVIDER_PROPERTY) != null)

3153

{

3154

3155

return aa.getProperty(Constants.SS_PROVIDER_PROPERTY);

}

// For JPred

12571

if (Constants.SS_ANNOTATION_FROM_JPRED_LABEL.equals(aa.label))

3161

{

3162

3163

return (Constants.SECONDARY_STRUCTURE_LABELS.get(aa.label));

}

// For input with secondary structure

3168

12571

if (Constants.SS_ANNOTATION_LABEL.equals(aa.label)

3169

&& aa.description != null

3170

&& Constants.SS_ANNOTATION_LABEL.equals(aa.description))

3171

{

3172

3173

return (Constants.SECONDARY_STRUCTURE_LABELS.get(aa.label));

}

// For other sources

12571

if (aa.sequenceRef == null)

{

return null;

}

12567

else if (aa.sequenceRef.getDatasetSequence() == null)

{

return null;

}

12567

Vector<PDBEntry> pdbEntries = aa.sequenceRef.getDatasetSequence()

3187

.getAllPDBEntries();

3188

3189

// TODO: this is an incredibly fragile mechanism

3190

12567

for (PDBEntry entry : pdbEntries)

3191

{

3192

3193

12567

String entryProvider = entry.getProvider();

3194

12567

if (entryProvider == null)

3195

{

3196

// No provider - so this is either an old Jalview project, or not

3197

// retrieved from recognised source

3198

12567

entryProvider = "PDB";

3199

}

3200

3201

// Should (re)use a standard mechanism for extracting the PDB ID as it

3202

// is written 1QWXTUV:CHAIN

3203

// Trim the string from first occurrence of colon

3204

12567

String entryID = entry.getId();

3205

12567

int index = entryID.indexOf(':');

3206

3207

// Check if colon exists

3208

12567

if (index != -1)

3209

{

3210

3211

// Trim the string from first occurrence of colon

3212

entryID = entryID.substring(0, index);

}

// TODO: shouldn't need to extract from description what the

3217

// originating ID is for this annotation!

3218

12567

if (entryProvider == "PDB" && aa.description.toLowerCase()

3219

.contains("secondary structure for "

3220

+ entryID.toLowerCase()))

3221

{

3222

3223

12567

return entryProvider;

}

else if (entryProvider != "PDB" && aa.description.toLowerCase()

3228

.contains(entryID.toLowerCase()))

3229

{

3230

3231

return entryProvider;

}

}

}

}

2034

return null;

}

// to do set priority for labels

3244

287156

public static List<AlignmentAnnotation> getAlignmentAnnotationForSource(

3245

SequenceI seq, String ssSource)

3246

{

3247

3248

287156

List<AlignmentAnnotation> ssAnnots = new ArrayList<AlignmentAnnotation>();

3249

287156

for (String ssLabel : Constants.SECONDARY_STRUCTURE_LABELS.keySet())

3250

{

3251

3252

574312

AlignmentAnnotation[] aa = seq.getAnnotation(ssLabel);

3253

574312

if (aa != null)

3254

{

3255

3256

24998

if (Constants.SS_ALL_PROVIDERS.equals(ssSource))

3257

{

3258

12499

ssAnnots.addAll(Arrays.asList(aa));

3259

12499

continue;

3260

}

3261

3262

12499

for (AlignmentAnnotation annot : aa)

3263

{

3264

3265

12499

String ssSourceForAnnot = extractSSSourceFromAnnotationDescription(

3266

annot);

3267

12499

if (ssSourceForAnnot != null && ssSource.equals(ssSourceForAnnot))

3268

{

3269

12499

ssAnnots.add(annot);

}

}

}

}

287156

if (ssAnnots.size() > 0)

3275

{

3276

24998

return ssAnnots;

3277

}

3278

3279

262158

return null;

}

public static Map<SequenceI, ArrayList<AlignmentAnnotation>> getSequenceAssociatedAlignmentAnnotations(

3284

AlignmentAnnotation[] alignAnnotList, String selectedSSSource)

3285

{

3286

3287

Map<SequenceI, ArrayList<AlignmentAnnotation>> ssAlignmentAnnotationForSequences = new HashMap<SequenceI, ArrayList<AlignmentAnnotation>>();

3288

if (alignAnnotList == null || alignAnnotList.length == 0)

3289

{

3290

return ssAlignmentAnnotationForSequences;

3291

}

3292

3293

for (AlignmentAnnotation aa : alignAnnotList)

3294

{

3295

if (aa.sequenceRef == null)

{

continue;

}

if (isSecondaryStructureFrom(selectedSSSource, aa))

3301

{

3302

ssAlignmentAnnotationForSequences

3303

.computeIfAbsent(aa.sequenceRef.getDatasetSequence(),

3304

k -> new ArrayList<>())

.add(aa);

}

}

return ssAlignmentAnnotationForSequences;

}

/**

* @param selectedSSSource

3316

* @param aa

3317

* @return true if aa is from a provider or all providers as specified by

3318

* selectedSSSource

3319

3320

public static boolean isSecondaryStructureFrom(String selectedSSSource,

3321

AlignmentAnnotation aa)

3322

{

3323

3324

for (String label : Constants.SECONDARY_STRUCTURE_LABELS.keySet())

3325

{

3326

3327

if (label.equals(aa.label))

3328

{

3329

3330

if (selectedSSSource.equals(Constants.SS_ALL_PROVIDERS))

{

return true;

}

String ssSource = AlignmentUtils

3335

.extractSSSourceFromAnnotationDescription(aa);

3336

if (ssSource != null && ssSource.equals(selectedSSSource))

{

return true;

}

}

}

return false;

}

// Method to get the key for a given provider value

3346

public static String getSecondaryStructureProviderKey(String providerValue) {

3347

for (Map.Entry<String, String> entry : Constants.STRUCTURE_PROVIDERS.entrySet()) {

3348

if (entry.getValue().equals(providerValue)) {

3349

return entry.getKey(); // Return the key (abbreviation) for the matching provider value

3350

}

3351

}

3352

return null; // Return null if no match is found

3353

}

3354

3355

public static String reduceLabelLength(String label) {

3356

// Split the input by " | "

3357

String[] parts = label.split(" \\| ");

3358

3359

// Map the full names to their abbreviations

3360

String reducedLabel = Arrays.stream(parts)

3361

.map(fullName -> Constants.STRUCTURE_PROVIDERS.entrySet().stream()

3362

.filter(entry -> entry.getValue().equals(fullName))

3363

.map(Map.Entry::getKey)

3364

.findFirst()

3365

.orElse(fullName)) // Use fullName if no abbreviation is found

3366

.collect(Collectors.joining(" | "));

3367

3368

return reducedLabel; // Return the reduced label if abbreviations were applied

3369

}

3370

3371

public static Color getSecondaryStructureProviderColor(String label) {

3372

3373

//return Constants.STRUCTURE_PROVIDERS_COLOR.getOrDefault(label, Color.BLACK);

3374

Color c = Constants.STRUCTURE_PROVIDERS_COLOR.get(label.trim());

if(c==null)

c = Color.BLACK;

return c;

}

public static void assignSecondaryStructureProviderColor(Map<String, Color> secondaryStructureProviderColorMap,

3382

List<String> labels) {

3383

3384

// Use a Set to track unique labels

3385

Set<String> uniqueLabels = new HashSet<>(labels);

3386

3387

Color[] palette = ColorBrewer.Paired.getColorPalette(uniqueLabels.size());

3388

3389

3390

List<Color> colorList = new ArrayList<>();

3391

Collections.addAll(colorList, palette);

3392

Collections.shuffle(colorList);

3393

int i = 0;

3394

3395

// Loop through each unique label and add it to the map with a color.

3396

for (String label : uniqueLabels) {

3397

// Generate or retrieve a color for the label.

3398

secondaryStructureProviderColorMap.put(label.toUpperCase().trim(), colorList.get(i));

i++;

}

}

}

Coverage Report

File AlignmentUtils.java

Coverage histogram

Code metrics

Classes

Class AlignmentUtils

Class AlignmentUtils.DnaVariant

Contributing tests

Contributing tests

Source view