File AlignmentUtils.java

Branches:

440

Statements:

888

Methods:

Classes:

LOC:

3,087

NCLOC:

1,882

Total complexity:

332

Complexity density:

0.37

Statements/Method:

16.44

Methods/Class:

Average method complexity:

6.15

Classes

Class	Line #	Total Statements	Complexity	TOTAL Coverage	Actions
AlignmentUtils	82	882	326	0.804824680.5%
AlignmentUtils.DnaVariant	98	6	6	0.00%

Class AlignmentUtils

Class AlignmentUtils	Line # 82	Total Statements 882	Complexity 326	TOTAL Coverage 0.804824680.5%
expandContext(AlignmentI,int) : AlignmentI expandContext(AlignmentI,int) : AlignmentI	139139	46.046	10.010	0.983871 0.98387198.4%
getSequenceIndex(AlignmentI,SequenceI) : int getSequenceIndex(AlignmentI,SequenceI) : int	245245	8.08	2.02	1.0 1.0100%
getSequencesByName(AlignmentI) : Map<String, List<SequenceI>> getSequencesByName(AlignmentI) : Map<String, List<SequenceI>>	268268	10.010	3.03	0.9285714 0.928571492.9%
mapProteinAlignmentToCdna(AlignmentI,AlignmentI) : boolean mapProteinAlignmentToCdna(AlignmentI,AlignmentI) : boolean	299299	7.07	3.03	0.7777778 0.777777877.8%
mapProteinToCdna(AlignmentI,AlignmentI,Set<SequenceI>,Set<SequenceI>,boolean) : boolean mapProteinToCdna(AlignmentI,AlignmentI,Set<SequenceI>,Set<SequenceI>,boolean) : boolean	341341	22.022	9.09	0.9375 0.937593.8%
mappingExists(List<AlignedCodonFrame>,SequenceI,SequenceI) : boolean mappingExists(List<AlignedCodonFrame>,SequenceI,SequenceI) : boolean	408408	5.05	3.03	0.6666667 0.666666766.7%
mapCdnaToProtein(SequenceI,SequenceI) : MapList mapCdnaToProtein(SequenceI,SequenceI) : MapList	441441	28.028	12.012	0.95238096 0.9523809695.2%
translatesAs(char[],int,char[]) : boolean translatesAs(char[],int,char[]) : boolean	531531	21.021	14.014	1.0 1.0100%
alignSequenceAs(SequenceI,AlignmentI,String,boolean,boolean) : boolean alignSequenceAs(SequenceI,AlignmentI,String,boolean,boolean) : boolean	608608	14.014	5.05	0.9 0.990%
alignSequenceAs(SequenceI,SequenceI,AlignedCodonFrame,String,char,boolean,boolean) : void alignSequenceAs(SequenceI,SequenceI,AlignedCodonFrame,String,char,boolean,boolean) : void	664664	60.060	20.020	1.0 1.0100%
calculateGapsToInsert(boolean,boolean,int,boolean,int,int,boolean) : int calculateGapsToInsert(boolean,boolean,int,boolean,int,int,boolean) : int	829829	15.015	10.010	1.0 1.0100%
alignProteinAsDna(AlignmentI,AlignmentI) : int alignProteinAsDna(AlignmentI,AlignmentI) : int	893893	6.06	3.03	0.625 0.62562.5%
alignCdsAsProtein(AlignmentI,AlignmentI) : int alignCdsAsProtein(AlignmentI,AlignmentI) : int	919919	18.018	5.05	0.8333333 0.833333383.3%
alignCdsSequenceAsProtein(SequenceI,AlignmentI,List<AlignedCodonFrame>,char) : boolean alignCdsSequenceAsProtein(SequenceI,AlignmentI,List<AlignedCodonFrame>,char) : boolean	965965	48.048	16.016	0.67105263 0.6710526367.1%
buildCodonColumnsMap(AlignmentI,AlignmentI,List<SequenceI>) : Map<AlignedCodon, Map<SequenceI, AlignedCodon>> buildCodonColumnsMap(AlignmentI,AlignmentI,List<SequenceI>) : Map<AlignedCodon, Map<SequenceI, AlignedCodon>>	10961096	13.013	2.02	1.0 1.0100%
addUnmappedPeptideStarts(Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,int) : void addUnmappedPeptideStarts(Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,int) : void	11521152	23.023	6.06	0.93939394 0.9393939493.9%
alignProteinAs(AlignmentI,Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,List<SequenceI>) : int alignProteinAs(AlignmentI,Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,List<SequenceI>) : int	12351235	17.017	2.02	1.0 1.0100%
addCodonPositions(SequenceI,SequenceI,char,Mapping,Map<AlignedCodon, Map<SequenceI, AlignedCodon>>) : void addCodonPositions(SequenceI,SequenceI,char,Mapping,Map<AlignedCodon, Map<SequenceI, AlignedCodon>>) : void	13001300	5.05	4.04	1.0 1.0100%
addCodonToMap(Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,AlignedCodon,SequenceI) : void addCodonToMap(Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,AlignedCodon,SequenceI) : void	13331333	5.05	2.02	1.0 1.0100%
isMappable(AlignmentI,AlignmentI) : boolean isMappable(AlignmentI,AlignmentI) : boolean	13631363	12.012	7.07	1.0 1.0100%
isMappable(SequenceI,SequenceI,List<AlignedCodonFrame>) : boolean isMappable(SequenceI,SequenceI,List<AlignedCodonFrame>) : boolean	14021402	8.08	6.06	0.625 0.62562.5%
findAddableReferenceAnnotations(List<SequenceI>,Map<String, String>,Map<SequenceI, List<AlignmentAnnotation>>,AlignmentI) : void findAddableReferenceAnnotations(List<SequenceI>,Map<String, String>,Map<SequenceI, List<AlignmentAnnotation>>,AlignmentI) : void	14491449	24.024	9.09	0.95 0.9595%
addReferenceAnnotations(Map<SequenceI, List<AlignmentAnnotation>>,AlignmentI,SequenceGroup) : void addReferenceAnnotations(Map<SequenceI, List<AlignmentAnnotation>>,AlignmentI,SequenceGroup) : void	15321532	3.03	1.01	1.0 1.0100%
isSSAnnotationPresent(Map<SequenceI, List<AlignmentAnnotation>>) : boolean isSSAnnotationPresent(Map<SequenceI, List<AlignmentAnnotation>>) : boolean	15451545	4.04	2.02	0.0 0.00%
addReferenceAnnotationTo(AlignmentI,SequenceI,AlignmentAnnotation,SequenceGroup) : AlignmentAnnotation addReferenceAnnotationTo(AlignmentI,SequenceI,AlignmentAnnotation,SequenceGroup) : AlignmentAnnotation	15741574	16.016	4.04	0.95454544 0.9545454495.5%
showOrHideSequenceAnnotations(AlignmentI,Collection<String>,List<SequenceI>,boolean,boolean) : void showOrHideSequenceAnnotations(AlignmentI,Collection<String>,List<SequenceI>,boolean,boolean) : void	16291629	6.06	7.07	0.9166667 0.916666791.7%
getFirstSequenceAnnotationOfType(AlignmentI,int) : AlignmentAnnotation getFirstSequenceAnnotationOfType(AlignmentI,int) : AlignmentAnnotation	16501650	6.06	4.04	0.0 0.00%
haveCrossRef(SequenceI,SequenceI) : boolean haveCrossRef(SequenceI,SequenceI) : boolean	16721672	1.01	1.01	1.0 1.0100%
hasCrossRef(SequenceI,SequenceI) : boolean hasCrossRef(SequenceI,SequenceI) : boolean	16871687	11.011	6.06	1.0 1.0100%
makeCdsAlignment(SequenceI[],AlignmentI,SequenceI[]) : AlignmentI makeCdsAlignment(SequenceI[],AlignmentI,SequenceI[]) : AlignmentI	17281728	66.066	16.016	0.8913044 0.891304489.1%
transferGeneLoci(SequenceI,MapList,SequenceI) : void transferGeneLoci(SequenceI,MapList,SequenceI) : void	19551955	8.08	4.04	0.9285714 0.928571492.9%
findCdsForProtein(List<AlignedCodonFrame>,SequenceI,List<AlignedCodonFrame>,Mapping) : SequenceI findCdsForProtein(List<AlignedCodonFrame>,SequenceI,List<AlignedCodonFrame>,Mapping) : SequenceI	19931993	19.019	11.011	0.9354839 0.935483993.5%
makeCdsSequence(SequenceI,Mapping,AlignmentI) : SequenceI makeCdsSequence(SequenceI,Mapping,AlignmentI) : SequenceI	20802080	32.032	10.010	0.62 0.6262%
propagateDBRefsToCDS(SequenceI,SequenceI,SequenceI,Mapping) : List<DBRefEntry> propagateDBRefsToCDS(SequenceI,SequenceI,SequenceI,Mapping) : List<DBRefEntry>	21732173	28.028	11.011	0.8863636 0.886363688.6%
transferFeatures(SequenceI,SequenceI,MapList,String,String) : int transferFeatures(SequenceI,SequenceI,MapList,String,String) : int	22612261	33.033	12.012	0.8867925 0.886792588.7%
mapCdsToProtein(SequenceI,SequenceI) : MapList mapCdsToProtein(SequenceI,SequenceI) : MapList	23572357	22.022	5.05	1.0 1.0100%
findCdsPositions(SequenceI) : List<int[]> findCdsPositions(SequenceI) : List<int[]>	24202420	20.020	7.07	0.9285714 0.928571492.9%
makeCopyAlignment(SequenceI[],SequenceI[],AlignmentI) : AlignmentI makeCopyAlignment(SequenceI[],SequenceI[],AlignmentI) : AlignmentI	24882488	21.021	9.09	0.0 0.00%
alignAs(AlignmentI,AlignmentI) : int alignAs(AlignmentI,AlignmentI) : int	25432543	26.026	5.05	0.88235295 0.8823529588.2%
alignAsSameSequences(AlignmentI,AlignmentI) : boolean alignAsSameSequences(AlignmentI,AlignmentI) : boolean	26222622	33.033	7.07	0.88372093 0.8837209388.4%
buildMappedColumnsMap(AlignmentI,AlignmentI,List<SequenceI>) : SortedMap<Integer, Map<SequenceI, Character>> buildMappedColumnsMap(AlignmentI,AlignmentI,List<SequenceI>) : SortedMap<Integer, Map<SequenceI, Character>>	27162716	11.011	3.03	0.93333334 0.9333333493.3%
addMappedPositions(SequenceI,SequenceI,Mapping,Map<Integer, Map<SequenceI, Character>>) : boolean addMappedPositions(SequenceI,SequenceI,Mapping,Map<Integer, Map<SequenceI, Character>>) : boolean	27702770	24.024	11.011	0.8 0.880%
looksLikeEnsembl(AlignmentI) : boolean looksLikeEnsembl(AlignmentI) : boolean	28452845	5.05	3.03	0.71428573 0.7142857371.4%
getSecondaryStructureSources(AlignmentAnnotation[]) : List<String> getSecondaryStructureSources(AlignmentAnnotation[]) : List<String>	28582858	8.08	3.03	1.0 1.0100%
isSecondaryStructurePresent(AlignmentAnnotation[]) : boolean isSecondaryStructurePresent(AlignmentAnnotation[]) : boolean	28792879	8.08	3.03	0.8333333 0.833333383.3%
getSecondaryStructureAnnotationColour(char) : Color getSecondaryStructureAnnotationColour(char) : Color	29022902	7.07	4.04	0.0 0.00%
findSSAnnotationForGivenSeqposition(AlignmentAnnotation,int) : char findSSAnnotationForGivenSeqposition(AlignmentAnnotation,int) : char	29212921	9.09	5.05	0.0 0.00%
extractSSSourceInAlignmentAnnotation(AlignmentAnnotation[]) : List<String> extractSSSourceInAlignmentAnnotation(AlignmentAnnotation[]) : List<String>	29482948	9.09	3.03	0.0 0.00%
extractSSSourceFromAnnotationDescription(AlignmentAnnotation) : String extractSSSourceFromAnnotationDescription(AlignmentAnnotation) : String	29742974	24.024	13.013	0.0 0.00%
getDisplayedAlignmentAnnotation(SequenceI) : AlignmentAnnotation getDisplayedAlignmentAnnotation(SequenceI) : AlignmentAnnotation	30623062	7.07	3.03	0.45454547 0.4545454745.5%

Class AlignmentUtils.DnaVariant

Class AlignmentUtils.DnaVariant	Line # 98	Total Statements 6	Complexity 6	TOTAL Coverage 0.00%
DnaVariant(String) DnaVariant(String)	104104	2.02	1.01	0.0 0.00%
DnaVariant(String,SequenceFeature) DnaVariant(String,SequenceFeature)	110110	2.02	1.01	0.0 0.00%
getSource() : String getSource() : String	116116	1.01	2.02	0.0 0.00%
toString() : String toString() : String	124124	1.01	2.02	0.0 0.00%

Contributing tests

This file is covered by 140 tests. .

Contributing tests

Test contribution	Test	Result
0.29232997	jalview.io.CrossRef2xmlTests.openCrossrefsForEnsemblTwicejalview.io.CrossRef2xmlTests.openCrossrefsForEnsemblTwice	1PASS
0.13314037	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignmentjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment	1PASS
0.10636758	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withXrefsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withXrefs	1PASS
0.10636758	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withStartAndStopCodonsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withStartAndStopCodons	1PASS
0.095513746	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_multipleProteinsjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_multipleProteins	1PASS
0.09479016	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_filterProductsjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_filterProducts	1PASS
0.09261939	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_noXrefsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_noXrefs	1PASS
0.090448625	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_alternativeTranscriptsjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_alternativeTranscripts	1PASS
0.0788712	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_prioritiseXrefsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_prioritiseXrefs	1PASS
0.07742402	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withIntronsjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withIntrons	1PASS
0.07452967	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_noIntronsjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_noIntrons	1PASS
0.070188135	jalview.io.FeaturesFileTest.simpleGff3RelaxedIdMatchingjalview.io.FeaturesFileTest.simpleGff3RelaxedIdMatching	1PASS
0.070188135	jalview.io.FeaturesFileTest.readGff3Filejalview.io.FeaturesFileTest.readGff3File	1PASS
0.070188135	jalview.io.FeaturesFileTest.simpleGff3FileClassjalview.io.FeaturesFileTest.simpleGff3FileClass	1PASS
0.070188135	jalview.io.FeaturesFileTest.simpleGff3FileLoaderjalview.io.FeaturesFileTest.simpleGff3FileLoader	1PASS
0.06729378	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_keepIntronGapsOnlyjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_keepIntronGapsOnly	1PASS
0.06801736	jalview.datamodel.AlignmentTest.testAlignAs_dnaAsDnajalview.datamodel.AlignmentTest.testAlignAs_dnaAsDna	1PASS
0.066570185	jalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTestjalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTest	1PASS
0.066570185	jalview.bin.CommandsTest.commandsOpenTestjalview.bin.CommandsTest.commandsOpenTest	1PASS
0.066570185	jalview.analysis.AlignmentUtilsTests.testAlignAs_alternateTranscriptsUngappedjalview.analysis.AlignmentUtilsTests.testAlignAs_alternateTranscriptsUngapped	1PASS
0.066570185	jalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTestjalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTest	1PASS
0.066570185	jalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTestjalview.bin.CommandsTest.argFilesGlobAndSubstitutionsTest	1PASS
0.06439942	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withUnmappedProteinjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withUnmappedProtein	1PASS
0.06512301	jalview.bin.CommandsTest.commandsOpenTestjalview.bin.CommandsTest.commandsOpenTest	1PASS
0.063675836	jalview.analysis.AlignmentUtilsTests.testAlignProteinAsDna_incompleteStartCodonjalview.analysis.AlignmentUtilsTests.testAlignProteinAsDna_incompleteStartCodon	1PASS
0.060057886	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withTrailingPeptidejalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withTrailingPeptide	1PASS
0.05788712	jalview.analysis.AlignmentUtilsTests.testAlignProteinAsDnajalview.analysis.AlignmentUtilsTests.testAlignProteinAsDna	1PASS
0.057163533	jalview.datamodel.AlignmentTest.testAlignAs_proteinAsCdnajalview.datamodel.AlignmentTest.testAlignAs_proteinAsCdna	1PASS
0.054992765	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_mappedProteinProteinjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_mappedProteinProtein	1PASS
0.052821998	jalview.datamodel.AlignmentTest.testAlignAs_cdnaAsProteinjalview.datamodel.AlignmentTest.testAlignAs_cdnaAsProtein	1PASS
0.053545587	jalview.analysis.AlignmentUtilsTests.testIsMappablejalview.analysis.AlignmentUtilsTests.testIsMappable	1PASS
0.05065123	jalview.bin.CommandsTest.structureImageOutputTestjalview.bin.CommandsTest.structureImageOutputTest	1PASS
0.05137482	jalview.datamodel.AlignmentTest.testAlignAs_cdnaAsProtein_singleSequencejalview.datamodel.AlignmentTest.testAlignAs_cdnaAsProtein_singleSequence	1PASS
0.05065123	jalview.bin.CommandsTest.structureImageOutputTestjalview.bin.CommandsTest.structureImageOutputTest	1PASS
0.045586105	jalview.bin.CommandsTest2.structureOpeningArgsTestjalview.bin.CommandsTest2.structureOpeningArgsTest	1PASS
0.04413893	jalview.bin.CommandsTest2.structureOpeningArgsTestjalview.bin.CommandsTest2.structureOpeningArgsTest	1PASS
0.04413893	jalview.bin.CommandsTest2.structureOpeningArgsTestjalview.bin.CommandsTest2.structureOpeningArgsTest	1PASS
0.042691752	jalview.gui.StructureChooserTest.openStructureFileForSequenceTestjalview.gui.StructureChooserTest.openStructureFileForSequenceTest	1PASS
0.042691752	jalview.gui.StructureChooserTest.openStructureFileForSequenceTestjalview.gui.StructureChooserTest.openStructureFileForSequenceTest	1PASS
0.042691752	jalview.bin.CommandsTest.structureImageAnnotationsOutputTestjalview.bin.CommandsTest.structureImageAnnotationsOutputTest	1PASS
0.042691752	jalview.bin.CommandsTest2.structureOpeningArgsTestjalview.bin.CommandsTest2.structureOpeningArgsTest	1PASS
0.042691752	jalview.bin.CommandsTest.structureImageAnnotationsOutputTestjalview.bin.CommandsTest.structureImageAnnotationsOutputTest	1PASS
0.042691752	jalview.bin.CommandsTest.structureImageAnnotationsOutputTestjalview.bin.CommandsTest.structureImageAnnotationsOutputTest	1PASS
0.041244574	jalview.gui.AnnotationLabelsTest2.testIdWidthNoChangesjalview.gui.AnnotationLabelsTest2.testIdWidthNoChanges	1PASS
0.041244574	jalview.gui.AnnotationLabelsTest2.testIdWidthChangesjalview.gui.AnnotationLabelsTest2.testIdWidthChanges	1PASS
0.041244574	jalview.gui.AnnotationLabelsTest2.testIdWidthNoChangesjalview.gui.AnnotationLabelsTest2.testIdWidthNoChanges	1PASS
0.041968163	jalview.analysis.AlignmentUtilsTests.testExpandContextjalview.analysis.AlignmentUtilsTests.testExpandContext	1PASS
0.042691752	jalview.gui.StructureChooserTest.openStructureFileForSequenceTestjalview.gui.StructureChooserTest.openStructureFileForSequenceTest	1PASS
0.042691752	jalview.bin.CommandsTest.structureImageAnnotationsOutputTestjalview.bin.CommandsTest.structureImageAnnotationsOutputTest	1PASS
0.042691752	jalview.bin.CommandsTest2.structureOpeningArgsTestjalview.bin.CommandsTest2.structureOpeningArgsTest	1PASS
0.042691752	jalview.gui.StructureChooserTest.openStructureFileForSequenceTestjalview.gui.StructureChooserTest.openStructureFileForSequenceTest	1PASS
0.040520985	jalview.analysis.AlignmentUtilsTests.testMapCdsToProteinjalview.analysis.AlignmentUtilsTests.testMapCdsToProtein	1PASS
0.041244574	jalview.gui.AnnotationLabelsTest2.testIdWidthChangesjalview.gui.AnnotationLabelsTest2.testIdWidthChanges	1PASS
0.039797395	jalview.analysis.AlignmentUtilsTests.testAddReferenceContactMapjalview.analysis.AlignmentUtilsTests.testAddReferenceContactMap	1PASS
0.039797395	jalview.bin.CommandsTest.structureImageOutputTestjalview.bin.CommandsTest.structureImageOutputTest	1PASS
0.039797395	jalview.bin.CommandsTest.structureImageOutputTestjalview.bin.CommandsTest.structureImageOutputTest	1PASS
0.03545586	jalview.analysis.AlignmentUtilsTests.testAddReferenceAnnotationsjalview.analysis.AlignmentUtilsTests.testAddReferenceAnnotations	1PASS
0.03400868	jalview.analysis.AlignmentUtilsTests.testExpandContext_annotationjalview.analysis.AlignmentUtilsTests.testExpandContext_annotation	1PASS
0.031837918	jalview.analysis.AlignmentUtilsTests.testMapCdnaToProtein_forSubsequencejalview.analysis.AlignmentUtilsTests.testMapCdnaToProtein_forSubsequence	1PASS
0.028219972	jalview.analysis.AlignmentUtilsTests.testTransferFeaturesjalview.analysis.AlignmentUtilsTests.testTransferFeatures	1PASS
0.026772793	jalview.analysis.AlignmentUtilsTests.testAlignAsSameSequencesMultipleSubSeqjalview.analysis.AlignmentUtilsTests.testAlignAsSameSequencesMultipleSubSeq	1PASS
0.027496383	jalview.analysis.AlignmentUtilsTests.testAlignAsSameSequencesjalview.analysis.AlignmentUtilsTests.testAlignAsSameSequences	1PASS
0.027496383	jalview.analysis.AlignmentUtilsTests.testTranslatesAsjalview.analysis.AlignmentUtilsTests.testTranslatesAs	1PASS
0.026049204	jalview.analysis.AlignmentUtilsTests.testTransferFeatures_withOmitjalview.analysis.AlignmentUtilsTests.testTransferFeatures_withOmit	1PASS
0.023154847	jalview.analysis.AlignmentUtilsTests.testAddMappedPositions_withStopCodonjalview.analysis.AlignmentUtilsTests.testAddMappedPositions_withStopCodon	1PASS
0.023154847	jalview.analysis.AlignmentUtilsTests.testAddMappedPositionsjalview.analysis.AlignmentUtilsTests.testAddMappedPositions	1PASS
0.02170767	jalview.analysis.AlignmentUtilsTests.testTransferFeatures_withSelectjalview.analysis.AlignmentUtilsTests.testTransferFeatures_withSelect	1PASS
0.020984082	jalview.project.Jalview2xmlTests.testPAEsaveRestorejalview.project.Jalview2xmlTests.testPAEsaveRestore	1PASS
0.020260492	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_notOnAlignmentjalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_notOnAlignment	1PASS
0.020984082	jalview.analysis.AlignmentUtilsTests.testFindCdsForProtein_noUTRjalview.analysis.AlignmentUtilsTests.testFindCdsForProtein_noUTR	1PASS
0.019536903	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_alreadyAddedjalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_alreadyAdded	1PASS
0.020260492	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenujalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu	1PASS
0.018089725	jalview.analysis.AlignmentUtilsTests.testFindCdsPositions_fivePrimeIncompletejalview.analysis.AlignmentUtilsTests.testFindCdsPositions_fivePrimeIncomplete	1PASS
0.018089725	jalview.analysis.AlignmentUtilsTests.testFindCdsForProteinjalview.analysis.AlignmentUtilsTests.testFindCdsForProtein	1PASS
0.013748191	jalview.analysis.AlignmentUtilsTests.testFindCdsPositionsjalview.analysis.AlignmentUtilsTests.testFindCdsPositions	1PASS
0.01447178	jalview.analysis.AlignmentUtilsTests.testHaveCrossRefjalview.analysis.AlignmentUtilsTests.testHaveCrossRef	1PASS
0.01447178	jalview.analysis.AlignmentUtilsTests.testHasCrossRefjalview.analysis.AlignmentUtilsTests.testHasCrossRef	1PASS
0.015195369	jalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSourcesjalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSources	1PASS
0.015195369	jalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSourcesjalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSources	1PASS
0.012301013	jalview.ext.jmol.JmolViewerTest.testAddStrToSingleSeqViewJMoljalview.ext.jmol.JmolViewerTest.testAddStrToSingleSeqViewJMol	1PASS
0.012301013	jalview.project.Jalview2xmlTests.testCopyViewSettingsjalview.project.Jalview2xmlTests.testCopyViewSettings	1PASS
0.012301013	jalview.project.Jalview2xmlTests.testColourByAnnotScoresjalview.project.Jalview2xmlTests.testColourByAnnotScores	1PASS
0.012301013	jalview.project.Jalview2xmlTests.testStoreAndRecoverColourThresholdsjalview.project.Jalview2xmlTests.testStoreAndRecoverColourThresholds	1PASS
0.012301013	jalview.gui.AlignFrameTest.testNewView_colourThresholdsjalview.gui.AlignFrameTest.testNewView_colourThresholds	1PASS
0.013024602	jalview.datamodel.PAEContactMatrixTest.testSeqAssociatedPAEMatrixjalview.datamodel.PAEContactMatrixTest.testSeqAssociatedPAEMatrix	1PASS
0.011577424	jalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSourcesjalview.analysis.AlignmentUtilsTests.testSecondaryStructurePresentAndSources	1PASS
0.010130246	jalview.analysis.AlignmentUtilsTests.testTransferGeneLocijalview.analysis.AlignmentUtilsTests.testTransferGeneLoci	1PASS
0.010130246	jalview.analysis.AlignmentUtilsTests.testGetSequencesByNamejalview.analysis.AlignmentUtilsTests.testGetSequencesByName	1PASS
0.0079594795	jalview.io.AnnotatedPDBFileInputTest.testJalviewProjectRelocationAnnotationjalview.io.AnnotatedPDBFileInputTest.testJalviewProjectRelocationAnnotation	1PASS
0.0079594795	jalview.gui.AnnotationChooserTest.testSelectType_showForSelectedjalview.gui.AnnotationChooserTest.testSelectType_showForSelected	1PASS
0.0079594795	jalview.gui.AnnotationChooserTest.testIsInActionScope_unselectedScopejalview.gui.AnnotationChooserTest.testIsInActionScope_unselectedScope	1PASS
0.0079594795	jalview.project.Jalview2xmlTests.testTCoffeeScoresjalview.project.Jalview2xmlTests.testTCoffeeScores	1PASS
0.0079594795	jalview.gui.AnnotationColumnChooserTest.testResetjalview.gui.AnnotationColumnChooserTest.testReset	1PASS
0.0079594795	jalview.gui.AnnotationChooserTest.testIsInActionScope_selectedScopejalview.gui.AnnotationChooserTest.testIsInActionScope_selectedScope	1PASS
0.0079594795	jalview.gui.ColourMenuHelperTest.testAddMenuItems_nucleotidejalview.gui.ColourMenuHelperTest.testAddMenuItems_nucleotide	1PASS
0.0079594795	jalview.gui.PopupMenuTest.testHideInsertionsjalview.gui.PopupMenuTest.testHideInsertions	1PASS
0.0079594795	jalview.analysis.AnnotationSorterTest.testSortByTypeAndSequence_autocalcFirstjalview.analysis.AnnotationSorterTest.testSortByTypeAndSequence_autocalcFirst	1PASS
0.008683068	jalview.analysis.AlignmentUtilsTests.testShowOrHideSequenceAnnotationsjalview.analysis.AlignmentUtilsTests.testShowOrHideSequenceAnnotations	1PASS
0.0079594795	jalview.gui.AnnotationChooserTest.testBuildApplyToOptionsPanel_withSelectionGroupjalview.gui.AnnotationChooserTest.testBuildApplyToOptionsPanel_withSelectionGroup	1PASS
0.0079594795	jalview.analysis.AnnotationSorterTest.testSort_timingSemisortedjalview.analysis.AnnotationSorterTest.testSort_timingSemisorted	1PASS
0.0079594795	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_noReferenceAnnotationsjalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_noReferenceAnnotations	1PASS
0.0079594795	jalview.gui.AnnotationChooserTest.testSelectType_showForAlljalview.gui.AnnotationChooserTest.testSelectType_showForAll	1PASS
0.0079594795	jalview.analysis.AnnotationSorterTest.testSort_timingUnsortedjalview.analysis.AnnotationSorterTest.testSort_timingUnsorted	1PASS
0.0079594795	jalview.gui.AnnotationChooserTest.testDeselectType_showForSelectedjalview.gui.AnnotationChooserTest.testDeselectType_showForSelected	1PASS
0.0079594795	jalview.project.Jalview2xmlTests.testRNAStructureRecoveryjalview.project.Jalview2xmlTests.testRNAStructureRecovery	1PASS
0.0079594795	jalview.analysis.AnnotationSorterTest.testNoSort_autocalcFirstjalview.analysis.AnnotationSorterTest.testNoSort_autocalcFirst	1PASS
0.0079594795	jalview.gui.AnnotationChooserTest.testDeselectType_showForAlljalview.gui.AnnotationChooserTest.testDeselectType_showForAll	1PASS
0.0079594795	jalview.gui.AnnotationChooserTest.testDeselectType_hideForAlljalview.gui.AnnotationChooserTest.testDeselectType_hideForAll	1PASS
0.0079594795	jalview.gui.AnnotationChooserTest.testDeselectType_hideForSelectedjalview.gui.AnnotationChooserTest.testDeselectType_hideForSelected	1PASS
0.0079594795	jalview.ext.jmol.JmolParserTest.testAlignmentLoaderjalview.ext.jmol.JmolParserTest.testAlignmentLoader	1PASS
0.0079594795	jalview.analysis.AnnotationSorterTest.testSortByTypeAndSequence_autocalcLastjalview.analysis.AnnotationSorterTest.testSortByTypeAndSequence_autocalcLast	1PASS
0.0079594795	jalview.ext.jmol.JmolViewerTest.testSingleSeqViewJMoljalview.ext.jmol.JmolViewerTest.testSingleSeqViewJMol	1PASS
0.0079594795	jalview.project.Jalview2xmlTests.testStoreAndRecoverPDBEntryjalview.project.Jalview2xmlTests.testStoreAndRecoverPDBEntry	1PASS
0.0079594795	jalview.gui.AnnotationChooserTest.testSelectType_hideForSelectedjalview.gui.AnnotationChooserTest.testSelectType_hideForSelected	1PASS
0.0079594795	jalview.io.AnnotatedPDBFileInputTest.checkPDBSequenceFeaturesjalview.io.AnnotatedPDBFileInputTest.checkPDBSequenceFeatures	1PASS
0.0079594795	jalview.gui.AnnotationChooserTest.testSelectType_hideForAlljalview.gui.AnnotationChooserTest.testSelectType_hideForAll	1PASS
0.0079594795	jalview.analysis.AnnotationSorterTest.testSortBySequenceAndType_autocalcLastjalview.analysis.AnnotationSorterTest.testSortBySequenceAndType_autocalcLast	1PASS
0.0079594795	jalview.analysis.AnnotationSorterTest.testSort_timingPresortedjalview.analysis.AnnotationSorterTest.testSort_timingPresorted	1PASS
0.0079594795	jalview.gui.AnnotationChooserTest.testResetOriginalStatejalview.gui.AnnotationChooserTest.testResetOriginalState	1PASS
0.0079594795	jalview.analysis.AnnotationSorterTest.testSortBySequenceAndType_autocalcFirstjalview.analysis.AnnotationSorterTest.testSortBySequenceAndType_autocalcFirst	1PASS
0.0079594795	jalview.gui.AlignFrameTest.testChangeColour_background_groupsAndThresholdsjalview.gui.AlignFrameTest.testChangeColour_background_groupsAndThresholds	1PASS
0.006512301	jalview.ext.jmol.JmolParserTest.testFileParserjalview.ext.jmol.JmolParserTest.testFileParser	1PASS
0.006512301	jalview.gui.AlignViewportTest.testGetSelectionAsNewSequences_withContactMatricesjalview.gui.AlignViewportTest.testGetSelectionAsNewSequences_withContactMatrices	1PASS
0.006512301	jalview.project.Jalview2xmlTests.testStoreAndRecoverAnnotationRowElementColoursjalview.project.Jalview2xmlTests.testStoreAndRecoverAnnotationRowElementColours	1PASS
0.004341534	jalview.bin.CommandsTest.commandsOpenTestjalview.bin.CommandsTest.commandsOpenTest	1PASS
0.004341534	jalview.io.JSONFileTest.testBioJSONRoundTripWithColourSchemeNonejalview.io.JSONFileTest.testBioJSONRoundTripWithColourSchemeNone	1PASS
0.004341534	jalview.io.JSONFileTest.testGrpParsed_colourNonejalview.io.JSONFileTest.testGrpParsed_colourNone	1PASS
0.004341534	jalview.bin.CommandsTest.commandsOpenTestjalview.bin.CommandsTest.commandsOpenTest	1PASS
0.004341534	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_noSequenceSelectedjalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_noSequenceSelected	1PASS
0.004341534	jalview.project.Jalview2xmlTests.gatherViewsHerejalview.project.Jalview2xmlTests.gatherViewsHere	1PASS
0.004341534	jalview.project.Jalview2xmlTests.testStoreAndRecoverReferenceSeqSettingsjalview.project.Jalview2xmlTests.testStoreAndRecoverReferenceSeqSettings	1PASS
0.004341534	jalview.project.Jalview2xmlTests.testAutoShowOverviewForLegacyProjectsjalview.project.Jalview2xmlTests.testAutoShowOverviewForLegacyProjects	1PASS
0.004341534	jalview.analysis.GroupingTest.testMakeGroupsWithBothjalview.analysis.GroupingTest.testMakeGroupsWithBoth	1PASS
0.004341534	jalview.io.AnnotationFileIOTest.exampleAnnotationFileIOjalview.io.AnnotationFileIOTest.exampleAnnotationFileIO	1PASS
0.004341534	jalview.project.Jalview2xmlTests.noDuplicatePdbMappingsMadejalview.project.Jalview2xmlTests.noDuplicatePdbMappingsMade	1PASS
0.004341534	jalview.bin.CommandsTest.commandsOpenTestjalview.bin.CommandsTest.commandsOpenTest	1PASS
0.004341534	jalview.project.Jalview2xmlTests.viewRefPdbAnnotationjalview.project.Jalview2xmlTests.viewRefPdbAnnotation	1PASS
0.004341534	jalview.project.Jalview2xmlTests.testStoreAndRecoverExpandedviewsjalview.project.Jalview2xmlTests.testStoreAndRecoverExpandedviews	1PASS
0.004341534	jalview.project.Jalview2xmlTests.testStoreAndRestoreIDwidthAndAnnotationHeightjalview.project.Jalview2xmlTests.testStoreAndRestoreIDwidthAndAnnotationHeight	1PASS
0.004341534	jalview.io.AnnotationFileIOTest.testAnnotateAlignmentViewjalview.io.AnnotationFileIOTest.testAnnotateAlignmentView	1PASS

Source view

* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)

* Copyright (C) $$Year-Rel$$ The Jalview Authors

* This file is part of Jalview.

* Jalview is free software: you can redistribute it and/or

* modify it under the terms of the GNU General Public License

* as published by the Free Software Foundation, either version 3

* of the License, or (at your option) any later version.

* Jalview is distributed in the hope that it will be useful, but

* WITHOUT ANY WARRANTY; without even the implied warranty

* of MERCHANTABILITY or FITNESS FOR A PARTICULAR

* PURPOSE. See the GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with Jalview. If not, see <http://www.gnu.org/licenses/>.

* The Jalview Authors are detailed in the 'AUTHORS' file.

package jalview.analysis;

import java.awt.Color;

import java.util.ArrayList;

import java.util.Arrays;

import java.util.Collection;

import java.util.Collections;

import java.util.HashMap;

import java.util.HashSet;

import java.util.Iterator;

import java.util.LinkedHashMap;

import java.util.List;

import java.util.Locale;

import java.util.Map;

import java.util.Map.Entry;

import java.util.NoSuchElementException;

import java.util.Set;

import java.util.SortedMap;

import java.util.TreeMap;

import java.util.Vector;

import jalview.api.AlignCalcWorkerI;

import jalview.bin.Console;

import jalview.commands.RemoveGapColCommand;

import jalview.datamodel.AlignedCodon;

import jalview.datamodel.AlignedCodonFrame;

import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;

import jalview.datamodel.Alignment;

import jalview.datamodel.AlignmentAnnotation;

import jalview.datamodel.AlignmentI;

import jalview.datamodel.Annotation;

import jalview.datamodel.ContactMatrixI;

import jalview.datamodel.DBRefEntry;

import jalview.datamodel.GeneLociI;

import jalview.datamodel.IncompleteCodonException;

import jalview.datamodel.Mapping;

import jalview.datamodel.PDBEntry;

import jalview.datamodel.SeqCigar;

import jalview.datamodel.Sequence;

import jalview.datamodel.SequenceFeature;

import jalview.datamodel.SequenceGroup;

import jalview.datamodel.SequenceI;

import jalview.datamodel.features.SequenceFeatures;

import jalview.gui.AlignmentPanel;

import jalview.io.gff.SequenceOntologyI;

import jalview.schemes.ResidueProperties;

import jalview.util.Comparison;

import jalview.util.Constants;

import jalview.util.DBRefUtils;

import jalview.util.IntRangeComparator;

import jalview.util.MapList;

import jalview.util.MappingUtils;

import jalview.workers.SecondaryStructureConsensusThread;

/**

* grab bag of useful alignment manipulation operations Expect these to be

* refactored elsewhere at some point.

* @author jimp

public class AlignmentUtils

{

private static final int CODON_LENGTH = 3;

private static final String SEQUENCE_VARIANT = "sequence_variant:";

* the 'id' attribute is provided for variant features fetched from

* Ensembl using its REST service with JSON format

public static final String VARIANT_ID = "id";

/**

* A data model to hold the 'normal' base value at a position, and an optional

* sequence variant feature

static final class DnaVariant

{

100

final String base;

101

102

SequenceFeature variant;

103

104

DnaVariant(String nuc)

{

base = nuc;

variant = null;

}

DnaVariant(String nuc, SequenceFeature var)

{

base = nuc;

variant = var;

}

public String getSource()

117

{

118

return variant == null ? null : variant.getFeatureGroup();

}

/**

* toString for aid in the debugger only

123

124

@Override

125

public String toString()

126

{

127

return base + ":" + (variant == null ? "" : variant.getDescription());

}

}

/**

* given an existing alignment, create a new alignment including all, or up to

133

* flankSize additional symbols from each sequence's dataset sequence

* @param core

* @param flankSize

* @return AlignmentI

public static AlignmentI expandContext(AlignmentI core, int flankSize)

140

{

141

List<SequenceI> sq = new ArrayList<>();

142

int maxoffset = 0;

143

for (SequenceI s : core.getSequences())

144

{

145

131

SequenceI newSeq = s.deriveSequence();

146

131

final int newSeqStart = newSeq.getStart() - 1;

147

131

if (newSeqStart > maxoffset

148

&& newSeq.getDatasetSequence().getStart() < s.getStart())

149

{

150

131

maxoffset = newSeqStart;

151

}

152

131

sq.add(newSeq);

}

if (flankSize > -1)

{

maxoffset = Math.min(maxoffset, flankSize);

}

* now add offset left and right to create an expanded alignment

161

162

for (SequenceI s : sq)

163

{

164

131

SequenceI ds = s;

165

262

while (ds.getDatasetSequence() != null)

166

{

167

131

ds = ds.getDatasetSequence();

168

}

169

131

int s_end = s.findPosition(s.getStart() + s.getLength());

170

// find available flanking residues for sequence

171

131

int ustream_ds = s.getStart() - ds.getStart();

172

131

int dstream_ds = ds.getEnd() - s_end;

173

174

// build new flanked sequence

175

176

// compute gap padding to start of flanking sequence

177

131

int offset = maxoffset - ustream_ds;

178

179

// padding is gapChar x ( maxoffset - min(ustream_ds, flank)

180

131

if (flankSize >= 0)

181

{

182

125

if (flankSize < ustream_ds)

183

{

184

// take up to flankSize residues

185

offset = maxoffset - flankSize;

186

ustream_ds = flankSize;

187

}

188

125

if (flankSize <= dstream_ds)

189

{

190

116

dstream_ds = flankSize - 1;

191

}

192

}

193

// TODO use Character.toLowerCase to avoid creating String objects?

194

131

char[] upstream = new String(ds

195

.getSequence(s.getStart() - 1 - ustream_ds, s.getStart() - 1))

196

.toLowerCase(Locale.ROOT).toCharArray();

197

131

char[] downstream = new String(

198

ds.getSequence(s_end - 1, s_end + dstream_ds))

199

.toLowerCase(Locale.ROOT).toCharArray();

200

131

char[] coreseq = s.getSequence();

201

131

char[] nseq = new char[offset + upstream.length + downstream.length

202

+ coreseq.length];

203

131

char c = core.getGapCharacter();

204

205

131

int p = 0;

206

461

for (; p < offset; p++)

207

{

208

330

nseq[p] = c;

209

}

210

211

131

System.arraycopy(upstream, 0, nseq, p, upstream.length);

212

131

System.arraycopy(coreseq, 0, nseq, p + upstream.length,

213

coreseq.length);

214

131

System.arraycopy(downstream, 0, nseq,

215

p + coreseq.length + upstream.length, downstream.length);

216

131

s.setSequence(new String(nseq));

217

131

s.setStart(s.getStart() - ustream_ds);

218

131

s.setEnd(s_end + downstream.length);

219

}

220

AlignmentI newAl = new jalview.datamodel.Alignment(

221

sq.toArray(new SequenceI[0]));

222

for (SequenceI s : sq)

223

{

224

131

if (s.getAnnotation() != null)

225

{

226

for (AlignmentAnnotation aa : s.getAnnotation())

227

{

228

aa.adjustForAlignment(); // JAL-1712 fix

229

newAl.addAnnotation(aa);

}

}

}

newAl.setDataset(core.getDataset());

return newAl;

}

/**

* Returns the index (zero-based position) of a sequence in an alignment, or

* -1 if not found.

* @param al

* @param seq

* @return

58449

public static int getSequenceIndex(AlignmentI al, SequenceI seq)

246

{

247

58449

int result = -1;

248

58449

int pos = 0;

249

58449

for (SequenceI alSeq : al.getSequences())

250

{

251

125948980

if (alSeq == seq)

252

{

253

58410

result = pos;

254

58410

break;

255

}

256

125890570

pos++;

257

}

258

58449

return result;

}

/**

* Returns a map of lists of sequences in the alignment, keyed by sequence

263

* name. For use in mapping between different alignment views of the same

264

* sequences.

265

266

* @see jalview.datamodel.AlignmentI#getSequencesByName()

267

268

public static Map<String, List<SequenceI>> getSequencesByName(

269

AlignmentI al)

270

{

271

Map<String, List<SequenceI>> theMap = new LinkedHashMap<>();

272

for (SequenceI seq : al.getSequences())

273

{

274

String name = seq.getName();

275

if (name != null)

276

{

277

List<SequenceI> seqs = theMap.get(name);

278

if (seqs == null)

279

{

280

seqs = new ArrayList<>();

281

theMap.put(name, seqs);

}

seqs.add(seq);

}

}

return theMap;

}

/**

* Build mapping of protein to cDNA alignment. Mappings are made between

291

* sequences where the cDNA translates to the protein sequence. Any new

292

* mappings are added to the protein alignment. Returns true if any mappings

293

* either already exist or were added, else false.

294

295

* @param proteinAlignment

296

* @param cdnaAlignment

297

* @return

298

299

public static boolean mapProteinAlignmentToCdna(

300

final AlignmentI proteinAlignment, final AlignmentI cdnaAlignment)

301

{

302

if (proteinAlignment == null || cdnaAlignment == null)

{

return false;

}

Set<SequenceI> mappedDna = new HashSet<>();

308

Set<SequenceI> mappedProtein = new HashSet<>();

309

310

311

* First pass - map sequences where cross-references exist. This include

312

* 1-to-many mappings to support, for example, variant cDNA.

313

314

boolean mappingPerformed = mapProteinToCdna(proteinAlignment,

315

cdnaAlignment, mappedDna, mappedProtein, true);

316

317

318

* Second pass - map sequences where no cross-references exist. This only

319

* does 1-to-1 mappings and assumes corresponding sequences are in the same

320

* order in the alignments.

321

322

mappingPerformed |= mapProteinToCdna(proteinAlignment, cdnaAlignment,

323

mappedDna, mappedProtein, false);

324

return mappingPerformed;

}

/**

* Make mappings between compatible sequences (where the cDNA translation

329

* matches the protein).

330

331

* @param proteinAlignment

332

* @param cdnaAlignment

333

* @param mappedDna

334

* a set of mapped DNA sequences (to add to)

335

* @param mappedProtein

336

* a set of mapped Protein sequences (to add to)

337

* @param xrefsOnly

338

* if true, only map sequences where xrefs exist

339

* @return

340

341

protected static boolean mapProteinToCdna(

342

final AlignmentI proteinAlignment, final AlignmentI cdnaAlignment,

343

Set<SequenceI> mappedDna, Set<SequenceI> mappedProtein,

344

boolean xrefsOnly)

345

{

346

boolean mappingExistsOrAdded = false;

347

List<SequenceI> thisSeqs = proteinAlignment.getSequences();

348

for (SequenceI aaSeq : thisSeqs)

349

{

350

boolean proteinMapped = false;

351

AlignedCodonFrame acf = new AlignedCodonFrame();

352

353

for (SequenceI cdnaSeq : cdnaAlignment.getSequences())

354

{

355

356

* Always try to map if sequences have xref to each other; this supports

357

* variant cDNA or alternative splicing for a protein sequence.

358

359

* If no xrefs, try to map progressively, assuming that alignments have

360

* mappable sequences in corresponding order. These are not

361

* many-to-many, as that would risk mixing species with similar cDNA

362

* sequences.

363

364

if (xrefsOnly && !AlignmentUtils.haveCrossRef(aaSeq, cdnaSeq))

{

continue;

}

* Don't map non-xrefd sequences more than once each. This heuristic

371

* allows us to pair up similar sequences in ordered alignments.

372

373

if (!xrefsOnly && (mappedProtein.contains(aaSeq)

374

|| mappedDna.contains(cdnaSeq)))

{

continue;

}

if (mappingExists(proteinAlignment.getCodonFrames(),

379

aaSeq.getDatasetSequence(), cdnaSeq.getDatasetSequence()))

380

{

381

mappingExistsOrAdded = true;

}

else

{

MapList map = mapCdnaToProtein(aaSeq, cdnaSeq);

386

if (map != null)

387

{

388

acf.addMap(cdnaSeq, aaSeq, map);

389

mappingExistsOrAdded = true;

390

proteinMapped = true;

391

mappedDna.add(cdnaSeq);

392

mappedProtein.add(aaSeq);

}

}

}

if (proteinMapped)

{

proteinAlignment.addCodonFrame(acf);

399

}

400

}

401

return mappingExistsOrAdded;

}

/**

* Answers true if the mappings include one between the given (dataset)

406

* sequences.

407

408

protected static boolean mappingExists(List<AlignedCodonFrame> mappings,

409

SequenceI aaSeq, SequenceI cdnaSeq)

410

{

411

if (mappings != null)

412

{

413

for (AlignedCodonFrame acf : mappings)

414

{

415

if (cdnaSeq == acf.getDnaForAaSeq(aaSeq))

{

return true;

}

}

}

return false;

}

/**

* Builds a mapping (if possible) of a cDNA to a protein sequence.

426

* <ul>

427

* <li>first checks if the cdna translates exactly to the protein

428

* sequence</li>

429

* <li>else checks for translation after removing a STOP codon</li>

430

* <li>else checks for translation after removing a START codon</li>

431

* <li>if that fails, inspect CDS features on the cDNA sequence</li>

432

* </ul>

433

* Returns null if no mapping is determined.

434

435

* @param proteinSeq

436

* the aligned protein sequence

437

* @param cdnaSeq

438

* the aligned cdna sequence

439

* @return

440

441

public static MapList mapCdnaToProtein(SequenceI proteinSeq,

SequenceI cdnaSeq)

{

* Here we handle either dataset sequence set (desktop) or absent (applet).

446

* Use only the char[] form of the sequence to avoid creating possibly large

447

* String objects.

448

449

final SequenceI proteinDataset = proteinSeq.getDatasetSequence();

450

char[] aaSeqChars = proteinDataset != null

451

? proteinDataset.getSequence()

452

: proteinSeq.getSequence();

453

final SequenceI cdnaDataset = cdnaSeq.getDatasetSequence();

454

char[] cdnaSeqChars = cdnaDataset != null ? cdnaDataset.getSequence()

455

: cdnaSeq.getSequence();

456

if (aaSeqChars == null || cdnaSeqChars == null)

{

return null;

}

* cdnaStart/End, proteinStartEnd are base 1 (for dataset sequence mapping)

463

464

final int mappedLength = CODON_LENGTH * aaSeqChars.length;

465

int cdnaLength = cdnaSeqChars.length;

466

int cdnaStart = cdnaSeq.getStart();

467

int cdnaEnd = cdnaSeq.getEnd();

468

final int proteinStart = proteinSeq.getStart();

469

final int proteinEnd = proteinSeq.getEnd();

470

471

472

* If lengths don't match, try ignoring stop codon (if present)

473

474

if (cdnaLength != mappedLength && cdnaLength > 2)

475

{

476

String lastCodon = String.valueOf(cdnaSeqChars,

477

cdnaLength - CODON_LENGTH, CODON_LENGTH)

478

.toUpperCase(Locale.ROOT);

479

for (String stop : ResidueProperties.STOP_CODONS)

480

{

481

if (lastCodon.equals(stop))

482

{

483

cdnaEnd -= CODON_LENGTH;

484

cdnaLength -= CODON_LENGTH;

break;

}

}

}

* If lengths still don't match, try ignoring start codon.

492

493

int startOffset = 0;

494

if (cdnaLength != mappedLength && cdnaLength > 2

495

&& String.valueOf(cdnaSeqChars, 0, CODON_LENGTH)

496

.toUpperCase(Locale.ROOT)

497

.equals(ResidueProperties.START))

498

{

499

startOffset += CODON_LENGTH;

500

cdnaStart += CODON_LENGTH;

501

cdnaLength -= CODON_LENGTH;

502

}

503

504

if (translatesAs(cdnaSeqChars, startOffset, aaSeqChars))

505

{

506

507

* protein is translation of dna (+/- start/stop codons)

508

509

MapList map = new MapList(new int[] { cdnaStart, cdnaEnd },

510

new int[]

511

{ proteinStart, proteinEnd }, CODON_LENGTH, 1);

return map;

}

* translation failed - try mapping CDS annotated regions of dna

517

518

return mapCdsToProtein(cdnaSeq, proteinSeq);

}

/**

* Test whether the given cdna sequence, starting at the given offset,

523

* translates to the given amino acid sequence, using the standard translation

524

* table. Designed to fail fast i.e. as soon as a mismatch position is found.

525

526

* @param cdnaSeqChars

* @param cdnaStart

* @param aaSeqChars

* @return

protected static boolean translatesAs(char[] cdnaSeqChars, int cdnaStart,

532

char[] aaSeqChars)

533

{

534

if (cdnaSeqChars == null || aaSeqChars == null)

{

return false;

}

int aaPos = 0;

int dnaPos = cdnaStart;

541

161

for (; dnaPos < cdnaSeqChars.length - 2

542

&& aaPos < aaSeqChars.length; dnaPos += CODON_LENGTH, aaPos++)

543

{

544

130

String codon = String.valueOf(cdnaSeqChars, dnaPos, CODON_LENGTH);

545

130

final String translated = ResidueProperties.codonTranslate(codon);

546

547

548

* allow * in protein to match untranslatable in dna

549

550

130

final char aaRes = aaSeqChars[aaPos];

551

130

if ((translated == null || ResidueProperties.STOP.equals(translated))

&& aaRes == '*')

{

continue;

}

126

if (translated == null || !(aaRes == translated.charAt(0)))

557

{

558

// debug

559

// jalview.bin.Console.outPrintln(("Mismatch at " + i + "/" + aaResidue

560

// + ": "

561

// + codon + "(" + translated + ") != " + aaRes));

return false;

}

}

* check we matched all of the protein sequence

568

569

if (aaPos != aaSeqChars.length)

{

return false;

}

* check we matched all of the dna except

576

* for optional trailing STOP codon

577

578

if (dnaPos == cdnaSeqChars.length)

{

return true;

}

if (dnaPos == cdnaSeqChars.length - CODON_LENGTH)

583

{

584

String codon = String.valueOf(cdnaSeqChars, dnaPos, CODON_LENGTH);

585

if (ResidueProperties.STOP

586

.equals(ResidueProperties.codonTranslate(codon)))

{

return true;

}

}

return false;

}

/**

* Align sequence 'seq' to match the alignment of a mapped sequence. Note this

596

* currently assumes that we are aligning cDNA to match protein.

597

598

* @param seq

599

* the sequence to be realigned

600

* @param al

601

* the alignment whose sequence alignment is to be 'copied'

602

* @param gap

603

* character string represent a gap in the realigned sequence

604

* @param preserveUnmappedGaps

605

* @param preserveMappedGaps

606

* @return true if the sequence was realigned, false if it could not be

607

608

public static boolean alignSequenceAs(SequenceI seq, AlignmentI al,

609

String gap, boolean preserveMappedGaps,

610

boolean preserveUnmappedGaps)

611

{

612

613

* Get any mappings from the source alignment to the target (dataset)

614

* sequence.

615

616

// TODO there may be one AlignedCodonFrame per dataset sequence, or one with

617

// all mappings. Would it help to constrain this?

618

List<AlignedCodonFrame> mappings = al.getCodonFrame(seq);

619

if (mappings == null || mappings.isEmpty())

{

return false;

}

* Locate the aligned source sequence whose dataset sequence is mapped. We

626

* just take the first match here (as we can't align like more than one

627

* sequence).

628

629

SequenceI alignFrom = null;

630

AlignedCodonFrame mapping = null;

631

for (AlignedCodonFrame mp : mappings)

632

{

633

alignFrom = mp.findAlignedSequence(seq, al);

634

if (alignFrom != null)

{

mapping = mp;

break;

}

}

if (alignFrom == null)

{

return false;

}

alignSequenceAs(seq, alignFrom, mapping, gap, al.getGapCharacter(),

646

preserveMappedGaps, preserveUnmappedGaps);

return true;

}

/**

* Align sequence 'alignTo' the same way as 'alignFrom', using the mapping to

652

* match residues and codons. Flags control whether existing gaps in unmapped

653

* (intron) and mapped (exon) regions are preserved or not. Gaps between

654

* intron and exon are only retained if both flags are set.

* @param alignTo

* @param alignFrom

* @param mapping

* @param myGap

* @param sourceGap

* @param preserveUnmappedGaps

662

* @param preserveMappedGaps

663

664

public static void alignSequenceAs(SequenceI alignTo, SequenceI alignFrom,

665

AlignedCodonFrame mapping, String myGap, char sourceGap,

666

boolean preserveMappedGaps, boolean preserveUnmappedGaps)

667

{

668

// TODO generalise to work for Protein-Protein, dna-dna, dna-protein

669

670

// aligned and dataset sequence positions, all base zero

int thisSeqPos = 0;

int sourceDsPos = 0;

int basesWritten = 0;

675

char myGapChar = myGap.charAt(0);

676

int ratio = myGap.length();

677

678

int fromOffset = alignFrom.getStart() - 1;

679

int toOffset = alignTo.getStart() - 1;

680

int sourceGapMappedLength = 0;

681

boolean inExon = false;

682

final int toLength = alignTo.getLength();

683

final int fromLength = alignFrom.getLength();

684

StringBuilder thisAligned = new StringBuilder(2 * toLength);

685

686

687

* Traverse the 'model' aligned sequence

688

689

205

for (int i = 0; i < fromLength; i++)

690

{

691

186

char sourceChar = alignFrom.getCharAt(i);

692

186

if (sourceChar == sourceGap)

693

{

694

sourceGapMappedLength += ratio;

continue;

}

* Found a non-gap character. Locate its mapped region if any.

700

701

142

sourceDsPos++;

702

// Note mapping positions are base 1, our sequence positions base 0

703

142

int[] mappedPos = mapping.getMappedRegion(alignTo, alignFrom,

704

sourceDsPos + fromOffset);

705

142

if (mappedPos == null)

706

{

707

708

* unmapped position; treat like a gap

709

710

sourceGapMappedLength += ratio;

711

// jalview.bin.Console.errPrintln("Can't align: no codon mapping to

712

// residue "

713

// + sourceDsPos + "(" + sourceChar + ")");

// return;

continue;

}

int mappedCodonStart = mappedPos[0]; // position (1...) of codon start

719

int mappedCodonEnd = mappedPos[mappedPos.length - 1]; // codon end pos

720

StringBuilder trailingCopiedGap = new StringBuilder();

721

722

723

* Copy dna sequence up to and including this codon. Optionally, include

724

* gaps before the codon starts (in introns) and/or after the codon starts

725

* (in exons).

726

727

* Note this only works for 'linear' splicing, not reverse or interleaved.

728

* But then 'align dna as protein' doesn't make much sense otherwise.

729

730

int intronLength = 0;

731

294

while (basesWritten + toOffset < mappedCodonEnd

732

&& thisSeqPos < toLength)

733

{

734

246

final char c = alignTo.getCharAt(thisSeqPos++);

735

246

if (c != myGapChar)

736

{

737

146

basesWritten++;

738

146

int sourcePosition = basesWritten + toOffset;

739

146

if (sourcePosition < mappedCodonStart)

740

{

741

742

* Found an unmapped (intron) base. First add in any preceding gaps

743

* (if wanted).

744

745

if (preserveUnmappedGaps && trailingCopiedGap.length() > 0)

746

{

747

thisAligned.append(trailingCopiedGap.toString());

748

intronLength += trailingCopiedGap.length();

749

trailingCopiedGap = new StringBuilder();

}

intronLength++;

inExon = false;

}

else

{

final boolean startOfCodon = sourcePosition == mappedCodonStart;

757

int gapsToAdd = calculateGapsToInsert(preserveMappedGaps,

758

preserveUnmappedGaps, sourceGapMappedLength, inExon,

759

trailingCopiedGap.length(), intronLength, startOfCodon);

760

215

for (int k = 0; k < gapsToAdd; k++)

761

{

762

117

thisAligned.append(myGapChar);

763

}

764

sourceGapMappedLength = 0;

765

inExon = true;

766

}

767

146

thisAligned.append(c);

768

146

trailingCopiedGap = new StringBuilder();

}

else

{

100

if (inExon && preserveMappedGaps)

773

{

774

trailingCopiedGap.append(myGapChar);

775

}

776

else if (!inExon && preserveUnmappedGaps)

777

{

778

trailingCopiedGap.append(myGapChar);

}

}

}

}

* At end of model aligned sequence. Copy any remaining target sequence, optionally

786

* including (intron) gaps.

787

788

129

while (thisSeqPos < toLength)

789

{

790

110

final char c = alignTo.getCharAt(thisSeqPos++);

791

110

if (c != myGapChar || preserveUnmappedGaps)

792

{

793

102

thisAligned.append(c);

794

}

795

110

sourceGapMappedLength--;

}

* finally add gaps to pad for any trailing source gaps or

800

* unmapped characters

801

802

if (preserveUnmappedGaps)

803

{

804

while (sourceGapMappedLength > 0)

805

{

806

thisAligned.append(myGapChar);

807

sourceGapMappedLength--;

}

}

* All done aligning, set the aligned sequence.

813

814

alignTo.setSequence(new String(thisAligned));

}

/**

* Helper method to work out how many gaps to insert when realigning.

819

820

* @param preserveMappedGaps

821

* @param preserveUnmappedGaps

822

* @param sourceGapMappedLength

823

* @param inExon

824

* @param trailingCopiedGap

825

* @param intronLength

826

* @param startOfCodon

827

* @return

828

829

protected static int calculateGapsToInsert(boolean preserveMappedGaps,

830

boolean preserveUnmappedGaps, int sourceGapMappedLength,

831

boolean inExon, int trailingGapLength, int intronLength,

832

final boolean startOfCodon)

{

int gapsToAdd = 0;

if (startOfCodon)

{

* Reached start of codon. Ignore trailing gaps in intron unless we are

839

* preserving gaps in both exon and intron. Ignore them anyway if the

840

* protein alignment introduces a gap at least as large as the intronic

841

* region.

842

843

if (inExon && !preserveMappedGaps)

844

{

845

trailingGapLength = 0;

846

}

847

if (!inExon && !(preserveMappedGaps && preserveUnmappedGaps))

848

{

849

trailingGapLength = 0;

}

if (inExon)

{

gapsToAdd = Math.max(sourceGapMappedLength, trailingGapLength);

}

else

{

if (intronLength + trailingGapLength <= sourceGapMappedLength)

858

{

859

gapsToAdd = sourceGapMappedLength - intronLength;

}

else

{

gapsToAdd = Math.min(

864

intronLength + trailingGapLength - sourceGapMappedLength,

trailingGapLength);

}

}

}

else

{

* second or third base of codon; check for any gaps in dna

873

874

if (!preserveMappedGaps)

875

{

876

trailingGapLength = 0;

877

}

878

gapsToAdd = Math.max(sourceGapMappedLength, trailingGapLength);

}

return gapsToAdd;

}

/**

* Realigns the given protein to match the alignment of the dna, using codon

885

* mappings to translate aligned codon positions to protein residues.

886

887

* @param protein

888

* the alignment whose sequences are realigned by this method

889

* @param dna

890

* the dna alignment whose alignment we are 'copying'

891

* @return the number of sequences that were realigned

892

893

public static int alignProteinAsDna(AlignmentI protein, AlignmentI dna)

894

{

895

if (protein.isNucleotide() || !dna.isNucleotide())

896

{

897

jalview.bin.Console

898

.errPrintln("Wrong alignment type in alignProteinAsDna");

899

return 0;

900

}

901

List<SequenceI> unmappedProtein = new ArrayList<>();

902

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons = buildCodonColumnsMap(

903

protein, dna, unmappedProtein);

904

return alignProteinAs(protein, alignedCodons, unmappedProtein);

}

/**

* Realigns the given dna to match the alignment of the protein, using codon

909

* mappings to translate aligned peptide positions to codons.

910

911

* Always produces a padded CDS alignment.

912

913

* @param dna

914

* the alignment whose sequences are realigned by this method

915

* @param protein

916

* the protein alignment whose alignment we are 'copying'

917

* @return the number of sequences that were realigned

918

919

public static int alignCdsAsProtein(AlignmentI dna, AlignmentI protein)

920

{

921

if (protein.isNucleotide() || !dna.isNucleotide())

922

{

923

jalview.bin.Console

924

.errPrintln("Wrong alignment type in alignProteinAsDna");

925

return 0;

926

}

927

// todo: implement this

928

List<AlignedCodonFrame> mappings = protein.getCodonFrames();

929

int alignedCount = 0;

930

int width = 0; // alignment width for padding CDS

931

for (SequenceI dnaSeq : dna.getSequences())

932

{

933

if (alignCdsSequenceAsProtein(dnaSeq, protein, mappings,

934

dna.getGapCharacter()))

{

alignedCount++;

}

width = Math.max(dnaSeq.getLength(), width);

}

int oldwidth;

int diff;

for (SequenceI dnaSeq : dna.getSequences())

943

{

944

oldwidth = dnaSeq.getLength();

945

diff = width - oldwidth;

946

if (diff > 0)

947

{

948

dnaSeq.insertCharAt(oldwidth, diff, dna.getGapCharacter());

}

}

return alignedCount;

}

/**

* Helper method to align (if possible) the dna sequence to match the

956

* alignment of a mapped protein sequence. This is currently limited to

957

* handling coding sequence only.

* @param cdsSeq

* @param protein

* @param mappings

* @param gapChar

* @return

static boolean alignCdsSequenceAsProtein(SequenceI cdsSeq,

966

AlignmentI protein, List<AlignedCodonFrame> mappings,

967

char gapChar)

968

{

969

SequenceI cdsDss = cdsSeq.getDatasetSequence();

if (cdsDss == null)

{

System.err

.println("alignCdsSequenceAsProtein needs aligned sequence!");

return false;

}

List<AlignedCodonFrame> dnaMappings = MappingUtils

978

.findMappingsForSequence(cdsSeq, mappings);

979

for (AlignedCodonFrame mapping : dnaMappings)

980

{

981

SequenceI peptide = mapping.findAlignedSequence(cdsSeq, protein);

982

if (peptide != null)

983

{

984

final int peptideLength = peptide.getLength();

985

Mapping map = mapping.getMappingBetween(cdsSeq, peptide);

986

if (map != null)

987

{

988

MapList mapList = map.getMap();

989

if (map.getTo() == peptide.getDatasetSequence())

990

{

991

mapList = mapList.getInverse();

992

}

993

final int cdsLength = cdsDss.getLength();

994

int mappedFromLength = MappingUtils

995

.getLength(mapList.getFromRanges());

996

int mappedToLength = MappingUtils

997

.getLength(mapList.getToRanges());

998

boolean addStopCodon = (cdsLength == mappedFromLength

999

* CODON_LENGTH + CODON_LENGTH)

1000

|| (peptide.getDatasetSequence()

1001

.getLength() == mappedFromLength - 1);

1002

if (cdsLength != mappedToLength && !addStopCodon)

1003

{

1004

jalview.bin.Console.errPrintln(String.format(

1005

"Can't align cds as protein (length mismatch %d/%d): %s",

1006

cdsLength, mappedToLength, cdsSeq.getName()));

}

* pre-fill the aligned cds sequence with gaps

1011

1012

char[] alignedCds = new char[peptideLength * CODON_LENGTH

1013

+ (addStopCodon ? CODON_LENGTH : 0)];

1014

Arrays.fill(alignedCds, gapChar);

1015

1016

1017

* walk over the aligned peptide sequence and insert mapped

1018

* codons for residues in the aligned cds sequence

1019

1020

int copiedBases = 0;

1021

int cdsStart = cdsDss.getStart();

1022

int proteinPos = peptide.getStart() - 1;

1023

int cdsCol = 0;

1024

1025

for (int col = 0; col < peptideLength; col++)

1026

{

1027

char residue = peptide.getCharAt(col);

1028

1029

if (Comparison.isGap(residue))

1030

{

1031

cdsCol += CODON_LENGTH;

}

else

{

proteinPos++;

int[] codon = mapList.locateInTo(proteinPos, proteinPos);

1037

if (codon == null)

1038

{

1039

// e.g. incomplete start codon, X in peptide

1040

cdsCol += CODON_LENGTH;

}

else

{

for (int j = codon[0]; j <= codon[1]; j++)

1045

{

1046

char mappedBase = cdsDss.getCharAt(j - cdsStart);

1047

alignedCds[cdsCol++] = mappedBase;

copiedBases++;

}

}

}

}

* append stop codon if not mapped from protein,

1056

* closing it up to the end of the mapped sequence

1057

1058

if (copiedBases == cdsLength - CODON_LENGTH)

1059

{

1060

for (int i = alignedCds.length - 1; i >= 0; i--)

1061

{

1062

if (!Comparison.isGap(alignedCds[i]))

1063

{

1064

cdsCol = i + 1; // gap just after end of sequence

break;

}

}

for (int i = cdsLength - CODON_LENGTH; i < cdsLength; i++)

1069

{

1070

alignedCds[cdsCol++] = cdsDss.getCharAt(i);

1071

}

1072

}

1073

cdsSeq.setSequence(new String(alignedCds));

return true;

}

}

}

return false;

}

/**

* Builds a map whose key is an aligned codon position (3 alignment column

1083

* numbers base 0), and whose value is a map from protein sequence to each

1084

* protein's peptide residue for that codon. The map generates an ordering of

1085

* the codons, and allows us to read off the peptides at each position in

1086

* order to assemble 'aligned' protein sequences.

1087

1088

* @param protein

1089

* the protein alignment

1090

* @param dna

1091

* the coding dna alignment

1092

* @param unmappedProtein

1093

* any unmapped proteins are added to this list

1094

* @return

1095

1096

protected static Map<AlignedCodon, Map<SequenceI, AlignedCodon>> buildCodonColumnsMap(

1097

AlignmentI protein, AlignmentI dna,

1098

List<SequenceI> unmappedProtein)

1099

{

1100

1101

* maintain a list of any proteins with no mappings - these will be

1102

* rendered 'as is' in the protein alignment as we can't align them

1103

1104

unmappedProtein.addAll(protein.getSequences());

1105

1106

List<AlignedCodonFrame> mappings = protein.getCodonFrames();

1107

1108

1109

* Map will hold, for each aligned codon position e.g. [3, 5, 6], a map of

1110

* {dnaSequence, {proteinSequence, codonProduct}} at that position. The

1111

* comparator keeps the codon positions ordered.

1112

1113

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons = new TreeMap<>(

1114

new CodonComparator());

1115

1116

for (SequenceI dnaSeq : dna.getSequences())

1117

{

1118

for (AlignedCodonFrame mapping : mappings)

1119

{

1120

516

SequenceI prot = mapping.findAlignedSequence(dnaSeq, protein);

1121

516

if (prot != null)

1122

{

1123

Mapping seqMap = mapping.getMappingForSequence(dnaSeq);

1124

addCodonPositions(dnaSeq, prot, protein.getGapCharacter(), seqMap,

1125

alignedCodons);

1126

unmappedProtein.remove(prot);

}

}

}

* Finally add any unmapped peptide start residues (e.g. for incomplete

1133

* codons) as if at the codon position before the second residue

1134

1135

// TODO resolve JAL-2022 so this fudge can be removed

1136

int mappedSequenceCount = protein.getHeight() - unmappedProtein.size();

1137

addUnmappedPeptideStarts(alignedCodons, mappedSequenceCount);

1138

1139

return alignedCodons;

}

/**

* Scans for any protein mapped from position 2 (meaning unmapped start

1144

* position e.g. an incomplete codon), and synthesizes a 'codon' for it at the

1145

* preceding position in the alignment

1146

1147

* @param alignedCodons

1148

* the codon-to-peptide map

1149

* @param mappedSequenceCount

1150

* the number of distinct sequences in the map

1151

1152

protected static void addUnmappedPeptideStarts(

1153

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons,

1154

int mappedSequenceCount)

1155

{

1156

// TODO delete this ugly hack once JAL-2022 is resolved

1157

// i.e. we can model startPhase > 0 (incomplete start codon)

1158

1159

List<SequenceI> sequencesChecked = new ArrayList<>();

1160

AlignedCodon lastCodon = null;

1161

Map<SequenceI, AlignedCodon> toAdd = new HashMap<>();

1162

1163

for (Entry<AlignedCodon, Map<SequenceI, AlignedCodon>> entry : alignedCodons

1164

.entrySet())

1165

{

1166

1913

for (Entry<SequenceI, AlignedCodon> sequenceCodon : entry.getValue()

1167

.entrySet())

1168

{

1169

10665

SequenceI seq = sequenceCodon.getKey();

1170

10665

if (sequencesChecked.contains(seq))

1171

{

1172

10635

continue;

1173

}

1174

sequencesChecked.add(seq);

1175

AlignedCodon codon = sequenceCodon.getValue();

1176

if (codon.peptideCol > 1)

1177

{

1178

jalview.bin.Console.errPrintln(

1179

"Problem mapping protein with >1 unmapped start positions: "

1180

+ seq.getName());

1181

}

1182

else if (codon.peptideCol == 1)

1183

{

1184

1185

* first position (peptideCol == 0) was unmapped - add it

1186

1187

if (lastCodon != null)

1188

{

1189

AlignedCodon firstPeptide = new AlignedCodon(lastCodon.pos1,

1190

lastCodon.pos2, lastCodon.pos3,

1191

String.valueOf(seq.getCharAt(0)), 0);

1192

toAdd.put(seq, firstPeptide);

}

else

{

* unmapped residue at start of alignment (no prior column) -

1198

* 'insert' at nominal codon [0, 0, 0]

1199

1200

AlignedCodon firstPeptide = new AlignedCodon(0, 0, 0,

1201

String.valueOf(seq.getCharAt(0)), 0);

1202

toAdd.put(seq, firstPeptide);

1203

}

1204

}

1205

if (sequencesChecked.size() == mappedSequenceCount)

1206

{

1207

// no need to check past first mapped position in all sequences

break;

}

}

1913

lastCodon = entry.getKey();

}

* add any new codons safely after iterating over the map

1216

1217

for (Entry<SequenceI, AlignedCodon> startCodon : toAdd.entrySet())

1218

{

1219

addCodonToMap(alignedCodons, startCodon.getValue(),

1220

startCodon.getKey());

}

}

/**

* Update the aligned protein sequences to match the codon alignments given in

* the map.

* @param protein

* @param alignedCodons

1230

* an ordered map of codon positions (columns), with sequence/peptide

1231

* values present in each column

1232

* @param unmappedProtein

1233

* @return

1234

1235

protected static int alignProteinAs(AlignmentI protein,

1236

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons,

1237

List<SequenceI> unmappedProtein)

1238

{

1239

1240

* prefill peptide sequences with gaps

1241

1242

int alignedWidth = alignedCodons.size();

1243

char[] gaps = new char[alignedWidth];

1244

Arrays.fill(gaps, protein.getGapCharacter());

1245

Map<SequenceI, char[]> peptides = new HashMap<>();

1246

for (SequenceI seq : protein.getSequences())

1247

{

1248

if (!unmappedProtein.contains(seq))

1249

{

1250

peptides.put(seq, Arrays.copyOf(gaps, gaps.length));

}

}

* Traverse the codons left to right (as defined by CodonComparator)

1256

* and insert peptides in each column where the sequence is mapped.

1257

* This gives a peptide 'alignment' where residues are aligned if their

1258

* corresponding codons occupy the same columns in the cdna alignment.

1259

1260

int column = 0;

1261

for (AlignedCodon codon : alignedCodons.keySet())

1262

{

1263

1914

final Map<SequenceI, AlignedCodon> columnResidues = alignedCodons

1264

.get(codon);

1265

1914

for (Entry<SequenceI, AlignedCodon> entry : columnResidues.entrySet())

1266

{

1267

10682

char residue = entry.getValue().product.charAt(0);

1268

10682

peptides.get(entry.getKey())[column] = residue;

1269

}

1270

1914

column++;

}

* and finally set the constructed sequences

1275

1276

for (Entry<SequenceI, char[]> entry : peptides.entrySet())

1277

{

1278

entry.getKey().setSequence(new String(entry.getValue()));

}

return 0;

}

/**

* Populate the map of aligned codons by traversing the given sequence

1286

* mapping, locating the aligned positions of mapped codons, and adding those

1287

* positions and their translation products to the map.

1288

1289

* @param dna

1290

* the aligned sequence we are mapping from

1291

* @param protein

1292

* the sequence to be aligned to the codons

1293

* @param gapChar

1294

* the gap character in the dna sequence

1295

* @param seqMap

1296

* a mapping to a sequence translation

1297

* @param alignedCodons

1298

* the map we are building up

1299

1300

static void addCodonPositions(SequenceI dna, SequenceI protein,

1301

char gapChar, Mapping seqMap,

1302

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons)

1303

{

1304

Iterator<AlignedCodon> codons = seqMap.getCodonIterator(dna, gapChar);

1305

1306

1307

* add codon positions, and their peptide translations, to the alignment

1308

* map, while remembering the first codon mapped

1309

1310

10716

while (codons.hasNext())

1311

{

1312

10684

try

1313

{

1314

10684

AlignedCodon codon = codons.next();

1315

10684

addCodonToMap(alignedCodons, codon, protein);

1316

} catch (IncompleteCodonException e)

1317

{

1318

// possible incomplete trailing codon - ignore

1319

} catch (NoSuchElementException e)

1320

{

1321

// possibly peptide lacking STOP

}

}

}

/**

* Helper method to add a codon-to-peptide entry to the aligned codons map

1328

1329

* @param alignedCodons

* @param codon

* @param protein

10690

protected static void addCodonToMap(

1334

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons,

1335

AlignedCodon codon, SequenceI protein)

1336

{

1337

10690

Map<SequenceI, AlignedCodon> seqProduct = alignedCodons.get(codon);

1338

10690

if (seqProduct == null)

1339

{

1340

1914

seqProduct = new HashMap<>();

1341

1914

alignedCodons.put(codon, seqProduct);

1342

}

1343

10690

seqProduct.put(protein, codon);

}

/**

* Returns true if a cDNA/Protein mapping either exists, or could be made,

1348

* between at least one pair of sequences in the two alignments. Currently,

1349

* the logic is:

1350

* <ul>

1351

* <li>One alignment must be nucleotide, and the other protein</li>

1352

* <li>At least one pair of sequences must be already mapped, or mappable</li>

1353

* <li>Mappable means the nucleotide translation matches the protein

1354

* sequence</li>

1355

* <li>The translation may ignore start and stop codons if present in the

* nucleotide</li>

* </ul>

* @param al1

* @param al2

* @return

public static boolean isMappable(AlignmentI al1, AlignmentI al2)

1364

{

1365

if (al1 == null || al2 == null)

{

return false;

}

* Require one nucleotide and one protein

1372

1373

if (al1.isNucleotide() == al2.isNucleotide())

{

return false;

}

AlignmentI dna = al1.isNucleotide() ? al1 : al2;

1378

AlignmentI protein = dna == al1 ? al2 : al1;

1379

List<AlignedCodonFrame> mappings = protein.getCodonFrames();

1380

for (SequenceI dnaSeq : dna.getSequences())

1381

{

1382

for (SequenceI proteinSeq : protein.getSequences())

1383

{

1384

if (isMappable(dnaSeq, proteinSeq, mappings))

{

return true;

}

}

}

return false;

}

/**

* Returns true if the dna sequence is mapped, or could be mapped, to the

* protein sequence.

* @param dnaSeq

* @param proteinSeq

* @param mappings

* @return

protected static boolean isMappable(SequenceI dnaSeq,

1403

SequenceI proteinSeq, List<AlignedCodonFrame> mappings)

1404

{

1405

if (dnaSeq == null || proteinSeq == null)

{

return false;

}

SequenceI dnaDs = dnaSeq.getDatasetSequence() == null ? dnaSeq

1411

: dnaSeq.getDatasetSequence();

1412

SequenceI proteinDs = proteinSeq.getDatasetSequence() == null

1413

? proteinSeq

1414

: proteinSeq.getDatasetSequence();

1415

1416

for (AlignedCodonFrame mapping : mappings)

1417

{

1418

if (proteinDs == mapping.getAaForDnaSeq(dnaDs))

{

* already mapped

return true;

}

}

* Just try to make a mapping (it is not yet stored), test whether

1429

* successful.

1430

1431

return mapCdnaToProtein(proteinDs, dnaDs) != null;

}

/**

* Finds any reference annotations associated with the sequences in

1436

* sequenceScope, that are not already added to the alignment, and adds them

1437

* to the 'candidates' map. Also populates a lookup table of annotation

1438

* labels, keyed by calcId, for use in constructing tooltips or the like.

1439

1440

* @param sequenceScope

1441

* the sequences to scan for reference annotations

1442

* @param labelForCalcId

1443

* (optional) map to populate with label for calcId

1444

* @param candidates

1445

* map to populate with annotations for sequence

1446

* @param al

1447

* the alignment to check for presence of annotations

1448

1449

public static void findAddableReferenceAnnotations(

1450

List<SequenceI> sequenceScope, Map<String, String> labelForCalcId,

1451

final Map<SequenceI, List<AlignmentAnnotation>> candidates,

1452

AlignmentI al)

1453

{

1454

if (sequenceScope == null)

{

return;

}

* For each sequence in scope, make a list of any annotations on the

1461

* underlying dataset sequence which are not already on the alignment.

1462

1463

* Add to a map of { alignmentSequence, <List of annotations to add> }

1464

1465

for (SequenceI seq : sequenceScope)

1466

{

1467

SequenceI dataset = seq.getDatasetSequence();

if (dataset == null)

{

continue;

}

AlignmentAnnotation[] datasetAnnotations = dataset.getAnnotation();

1473

if (datasetAnnotations == null)

{

continue;

}

final List<AlignmentAnnotation> result = new ArrayList<>();

1478

for (AlignmentAnnotation dsann : datasetAnnotations)

1479

{

1480

1481

* Find matching annotations on the alignment. If none is found, then

1482

* add this annotation to the list of 'addable' annotations for this

1483

* sequence.

1484

1485

155

final Iterable<AlignmentAnnotation> matchedAlignmentAnnotations = al

1486

.findAnnotations(seq, dsann.getCalcId(), dsann.label);

1487

155

boolean found = false;

1488

155

if (matchedAlignmentAnnotations != null)

1489

{

1490

152

for (AlignmentAnnotation matched : matchedAlignmentAnnotations)

1491

{

1492

135

if (dsann.description.equals(matched.description))

{

found = true;

break;

}

}

}

155

if (!found)

1500

{

1501

101

result.add(dsann);

1502

101

if (labelForCalcId != null)

1503

{

1504

labelForCalcId.put(dsann.getCalcId(), dsann.label);

}

}

}

* Save any addable annotations for this sequence

1510

1511

if (!result.isEmpty())

1512

{

1513

candidates.put(seq, result);

}

}

}

/**

* Adds annotations to the top of the alignment annotations, in the same order

1520

* as their related sequences. If you already have an annotation and want to

1521

* add it to a sequence in an alignment use {@code addReferenceAnnotationTo}

1522

1523

* @param annotations

1524

* the annotations to add

1525

* @param alignment

1526

* the alignment to add them to

1527

* @param selectionGroup

1528

* current selection group - may be null, if provided then any added

1529

* annotation will be trimmed to just those columns in the selection

1530

* group

1531

1532

public static void addReferenceAnnotations(

1533

Map<SequenceI, List<AlignmentAnnotation>> annotations,

1534

final AlignmentI alignment, final SequenceGroup selectionGroup)

1535

{

1536

for (SequenceI seq : annotations.keySet())

1537

{

1538

for (AlignmentAnnotation ann : annotations.get(seq))

1539

{

1540

addReferenceAnnotationTo(alignment, seq, ann, selectionGroup);

}

}

}

public static boolean isSSAnnotationPresent(

1546

Map<SequenceI, List<AlignmentAnnotation>> annotations)

1547

{

1548

1549

for (SequenceI seq : annotations.keySet())

1550

{

1551

if (isSecondaryStructurePresent(

1552

annotations.get(seq).toArray(new AlignmentAnnotation[0])))

{

return true;

}

}

return false;

}

/**

* Make a copy of a reference annotation {@code ann} and add it to an

1562

* alignment sequence {@code seq} in {@code alignment}, optionally limited to

1563

* the extent of {@code selectionGroup}

* @param alignment

* @param seq

* @param ann

* @param selectionGroup

1569

* current selection group - may be null, if provided then any added

1570

* annotation will be trimmed to just those columns in the selection

1571

* group

1572

* @return annotation added to {@code seq and {@code alignment}

1573

1574

public static AlignmentAnnotation addReferenceAnnotationTo(

1575

final AlignmentI alignment, final SequenceI seq,

1576

final AlignmentAnnotation ann, final SequenceGroup selectionGroup)

1577

{

1578

AlignmentAnnotation copyAnn = new AlignmentAnnotation(ann);

1579

int startRes = 0;

1580

int endRes = ann.annotations.length;

1581

if (selectionGroup != null)

1582

{

1583

startRes = -1 + Math.min(seq.getEnd(), Math.max(seq.getStart(),

1584

seq.findPosition(selectionGroup.getStartRes())));

1585

endRes = -1 + Math.min(seq.getEnd(),

1586

seq.findPosition(selectionGroup.getEndRes()));

1587

1588

}

1589

copyAnn.restrict(startRes, endRes + 0);

1590

1591

1592

* Add to the sequence (sets copyAnn.datasetSequence), unless the

1593

* original annotation is already on the sequence.

1594

1595

if (!seq.hasAnnotation(ann))

1596

{

1597

ContactMatrixI cm = seq.getDatasetSequence().getContactMatrixFor(ann);

1598

if (cm != null)

1599

{

1600

seq.addContactListFor(copyAnn, cm);

1601

}

1602

seq.addAlignmentAnnotation(copyAnn);

1603

}

1604

// adjust for gaps

1605

copyAnn.adjustForAlignment();

1606

// add to the alignment and set visible

1607

alignment.addAnnotation(copyAnn);

1608

copyAnn.visible = true;

return copyAnn;

}

/**

* Set visibility of alignment annotations of specified types (labels), for

1615

* specified sequences. This supports controls like "Show all secondary

1616

* structure", "Hide all Temp factor", etc.

1617

1618

* @al the alignment to scan for annotations

1619

* @param types

1620

* the types (labels) of annotations to be updated

1621

* @param forSequences

1622

* if not null, only annotations linked to one of these sequences are

1623

* in scope for update; if null, acts on all sequence annotations

1624

* @param anyType

1625

* if this flag is true, 'types' is ignored (label not checked)

1626

* @param doShow

1627

* if true, set visibility on, else set off

1628

1629

public static void showOrHideSequenceAnnotations(AlignmentI al,

1630

Collection<String> types, List<SequenceI> forSequences,

1631

boolean anyType, boolean doShow)

1632

{

1633

AlignmentAnnotation[] anns = al.getAlignmentAnnotation();

1634

if (anns != null)

1635

{

1636

for (AlignmentAnnotation aa : anns)

1637

{

1638

if (anyType || types.contains(aa.label))

1639

{

1640

if ((aa.sequenceRef != null) && (forSequences == null

1641

|| forSequences.contains(aa.sequenceRef)))

{

aa.visible = doShow;

}

}

}

}

}

public static AlignmentAnnotation getFirstSequenceAnnotationOfType(

1651

AlignmentI al, int graphType)

1652

{

1653

AlignmentAnnotation[] anns = al.getAlignmentAnnotation();

1654

if (anns != null)

1655

{

1656

for (AlignmentAnnotation aa : anns)

1657

{

1658

if (aa.sequenceRef != null && aa.graph == graphType)

return aa;

}

}

return null;

}

/**

* Returns true if either sequence has a cross-reference to the other

* @param seq1

* @param seq2

* @return

public static boolean haveCrossRef(SequenceI seq1, SequenceI seq2)

1673

{

1674

// Note: moved here from class CrossRef as the latter class has dependencies

1675

// not availability to the applet's classpath

1676

return hasCrossRef(seq1, seq2) || hasCrossRef(seq2, seq1);

}

/**

* Returns true if seq1 has a cross-reference to seq2. Currently this assumes

1681

* that sequence name is structured as Source|AccessionId.

* @param seq1

* @param seq2

* @return

108

public static boolean hasCrossRef(SequenceI seq1, SequenceI seq2)

1688

{

1689

108

if (seq1 == null || seq2 == null)

{

return false;

}

100

String name = seq2.getName();

1694

100

final List<DBRefEntry> xrefs = seq1.getDBRefs();

1695

100

if (xrefs != null)

1696

{

1697

for (int ix = 0, nx = xrefs.size(); ix < nx; ix++)

1698

{

1699

DBRefEntry xref = xrefs.get(ix);

1700

String xrefName = xref.getSource() + "|" + xref.getAccessionId();

1701

// case-insensitive test, consistent with DBRefEntry.equalRef()

1702

if (xrefName.equalsIgnoreCase(name))

{

return true;

}

}

}

return false;

}

/**

* Constructs an alignment consisting of the mapped (CDS) regions in the given

1713

* nucleotide sequences, and updates mappings to match. The CDS sequences are

1714

* added to the original alignment's dataset, which is shared by the new

1715

* alignment. Mappings from nucleotide to CDS, and from CDS to protein, are

1716

* added to the alignment dataset.

1717

1718

* @param dna

1719

* aligned nucleotide (dna or cds) sequences

1720

* @param dataset

1721

* the alignment dataset the sequences belong to

1722

* @param products

1723

* (optional) to restrict results to CDS that map to specified

1724

* protein products

1725

* @return an alignment whose sequences are the cds-only parts of the dna

1726

* sequences (or null if no mappings are found)

1727

1728

public static AlignmentI makeCdsAlignment(SequenceI[] dna,

1729

AlignmentI dataset, SequenceI[] products)

1730

{

1731

if (dataset == null || dataset.getDataset() != null)

1732

{

1733

throw new IllegalArgumentException(

1734

"IMPLEMENTATION ERROR: dataset.getDataset() must be null!");

1735

}

1736

List<SequenceI> foundSeqs = new ArrayList<>();

1737

List<SequenceI> cdsSeqs = new ArrayList<>();

1738

List<AlignedCodonFrame> mappings = dataset.getCodonFrames();

1739

HashSet<SequenceI> productSeqs = null;

1740

if (products != null)

1741

{

1742

productSeqs = new HashSet<>();

1743

for (SequenceI seq : products)

1744

{

1745

productSeqs.add(seq.getDatasetSequence() == null ? seq

1746

: seq.getDatasetSequence());

}

}

* Construct CDS sequences from mappings on the alignment dataset.

1752

* The logic is:

1753

* - find the protein product(s) mapped to from each dna sequence

1754

* - if the mapping covers the whole dna sequence (give or take start/stop

1755

* codon), take the dna as the CDS sequence

1756

* - else search dataset mappings for a suitable dna sequence, i.e. one

1757

* whose whole sequence is mapped to the protein

1758

* - if no sequence found, construct one from the dna sequence and mapping

1759

* (and add it to dataset so it is found if this is repeated)

1760

1761

for (SequenceI dnaSeq : dna)

1762

{

1763

SequenceI dnaDss = dnaSeq.getDatasetSequence() == null ? dnaSeq

1764

: dnaSeq.getDatasetSequence();

1765

1766

List<AlignedCodonFrame> seqMappings = MappingUtils

1767

.findMappingsForSequence(dnaSeq, mappings);

1768

for (AlignedCodonFrame mapping : seqMappings)

1769

{

1770

List<Mapping> mappingsFromSequence = mapping

1771

.getMappingsFromSequence(dnaSeq);

1772

1773

for (Mapping aMapping : mappingsFromSequence)

1774

{

1775

MapList mapList = aMapping.getMap();

1776

if (mapList.getFromRatio() == 1)

1777

{

1778

1779

* not a dna-to-protein mapping (likely dna-to-cds)

continue;

}

* skip if mapping is not to one of the target set of proteins

1786

1787

SequenceI proteinProduct = aMapping.getTo();

1788

if (productSeqs != null && !productSeqs.contains(proteinProduct))

{

continue;

}

* try to locate the CDS from the dataset mappings;

1795

* guard against duplicate results (for the case that protein has

1796

* dbrefs to both dna and cds sequences)

1797

1798

SequenceI cdsSeq = findCdsForProtein(mappings, dnaSeq,

1799

seqMappings, aMapping);

1800

if (cdsSeq != null)

1801

{

1802

if (!foundSeqs.contains(cdsSeq))

1803

{

1804

foundSeqs.add(cdsSeq);

1805

SequenceI derivedSequence = cdsSeq.deriveSequence();

1806

cdsSeqs.add(derivedSequence);

1807

if (!dataset.getSequences().contains(cdsSeq))

1808

{

1809

dataset.addSequence(cdsSeq);

}

}

continue;

}

* didn't find mapped CDS sequence - construct it and add

1817

* its dataset sequence to the dataset

1818

1819

cdsSeq = makeCdsSequence(dnaSeq.getDatasetSequence(), aMapping,

1820

dataset).deriveSequence();

1821

// cdsSeq has a name constructed as CDS|<dbref>

1822

// <dbref> will be either the accession for the coding sequence,

1823

// marked in the /via/ dbref to the protein product accession

1824

// or it will be the original nucleotide accession.

1825

SequenceI cdsSeqDss = cdsSeq.getDatasetSequence();

cdsSeqs.add(cdsSeq);

* build the mapping from CDS to protein

1831

1832

List<int[]> cdsRange = Collections

1833

.singletonList(new int[]

1834

{ cdsSeq.getStart(),

1835

cdsSeq.getLength() + cdsSeq.getStart() - 1 });

1836

MapList cdsToProteinMap = new MapList(cdsRange,

1837

mapList.getToRanges(), mapList.getFromRatio(),

1838

mapList.getToRatio());

1839

1840

if (!dataset.getSequences().contains(cdsSeqDss))

1841

{

1842

1843

* if this sequence is a newly created one, add it to the dataset

1844

* and made a CDS to protein mapping (if sequence already exists,

1845

* CDS-to-protein mapping _is_ the transcript-to-protein mapping)

1846

1847

dataset.addSequence(cdsSeqDss);

1848

AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();

1849

cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct,

cdsToProteinMap);

* guard against duplicating the mapping if repeating this action

1854

1855

if (!mappings.contains(cdsToProteinMapping))

1856

{

1857

mappings.add(cdsToProteinMapping);

}

}

propagateDBRefsToCDS(cdsSeqDss, dnaSeq.getDatasetSequence(),

1862

proteinProduct, aMapping);

1863

1864

* add another mapping from original 'from' range to CDS

1865

1866

AlignedCodonFrame dnaToCdsMapping = new AlignedCodonFrame();

1867

final MapList dnaToCdsMap = new MapList(mapList.getFromRanges(),

1868

cdsRange, 1, 1);

1869

dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeqDss,

1870

dnaToCdsMap);

1871

if (!mappings.contains(dnaToCdsMapping))

1872

{

1873

mappings.add(dnaToCdsMapping);

}

* transfer dna chromosomal loci (if known) to the CDS

1878

* sequence (via the mapping)

1879

1880

final MapList cdsToDnaMap = dnaToCdsMap.getInverse();

1881

transferGeneLoci(dnaSeq, cdsToDnaMap, cdsSeq);

1882

1883

1884

* add DBRef with mapping from protein to CDS

1885

* (this enables Get Cross-References from protein alignment)

1886

* This is tricky because we can't have two DBRefs with the

1887

* same source and accession, so need a different accession for

1888

* the CDS from the dna sequence

1889

1890

1891

// specific use case:

1892

// Genomic contig ENSCHR:1, contains coding regions for ENSG01,

1893

// ENSG02, ENSG03, with transcripts and products similarly named.

1894

// cannot add distinct dbrefs mapping location on ENSCHR:1 to ENSG01

1895

1896

// JBPNote: ?? can't actually create an example that demonstrates we

1897

// need to

1898

// synthesize an xref.

1899

1900

List<DBRefEntry> primrefs = dnaDss.getPrimaryDBRefs();

1901

for (int ip = 0, np = primrefs.size(); ip < np; ip++)

1902

{

1903

DBRefEntry primRef = primrefs.get(ip);

1904

1905

* create a cross-reference from CDS to the source sequence's

1906

* primary reference and vice versa

1907

1908

String source = primRef.getSource();

1909

String version = primRef.getVersion();

1910

DBRefEntry cdsCrossRef = new DBRefEntry(source,

1911

source + ":" + version, primRef.getAccessionId());

1912

cdsCrossRef

1913

.setMap(new Mapping(dnaDss, new MapList(cdsToDnaMap)));

1914

cdsSeqDss.addDBRef(cdsCrossRef);

1915

1916

dnaSeq.addDBRef(new DBRefEntry(source, version,

1917

cdsSeq.getName(), new Mapping(cdsSeqDss, dnaToCdsMap)));

1918

// problem here is that the cross-reference is synthesized -

1919

// cdsSeq.getName() may be like 'CDS|dnaaccession' or

1920

// 'CDS|emblcdsacc'

1921

// assuming cds version same as dna ?!?

1922

1923

DBRefEntry proteinToCdsRef = new DBRefEntry(source, version,

1924

cdsSeq.getName());

1925

1926

proteinToCdsRef.setMap(

1927

new Mapping(cdsSeqDss, cdsToProteinMap.getInverse()));

1928

proteinProduct.addDBRef(proteinToCdsRef);

1929

}

1930

1931

* transfer any features on dna that overlap the CDS

1932

1933

transferFeatures(dnaSeq, cdsSeq, dnaToCdsMap, null,

1934

SequenceOntologyI.CDS);

}

}

}

AlignmentI cds = new Alignment(

1940

cdsSeqs.toArray(new SequenceI[cdsSeqs.size()]));

1941

cds.setDataset(dataset);

return cds;

}

/**

* Tries to transfer gene loci (dbref to chromosome positions) from fromSeq to

1948

* toSeq, mediated by the given mapping between the sequences

1949

1950

* @param fromSeq

1951

* @param targetToFrom

* Map

* @param targetSeq

protected static void transferGeneLoci(SequenceI fromSeq,

1956

MapList targetToFrom, SequenceI targetSeq)

1957

{

1958

if (targetSeq.getGeneLoci() != null)

1959

{

1960

// already have - don't override

1961

return;

1962

}

1963

GeneLociI fromLoci = fromSeq.getGeneLoci();

1964

if (fromLoci == null)

{

return;

}

MapList newMap = targetToFrom.traverse(fromLoci.getMapping());

if (newMap != null)

{

targetSeq.setGeneLoci(fromLoci.getSpeciesId(),

1974

fromLoci.getAssemblyId(), fromLoci.getChromosomeId(), newMap);

}

}

/**

* A helper method that finds a CDS sequence in the alignment dataset that is

1980

* mapped to the given protein sequence, and either is, or has a mapping from,

1981

* the given dna sequence.

1982

1983

* @param mappings

1984

* set of all mappings on the dataset

1985

* @param dnaSeq

1986

* a dna (or cds) sequence we are searching from

1987

* @param seqMappings

1988

* the set of mappings involving dnaSeq

1989

* @param aMapping

1990

* a transcript-to-peptide mapping

1991

* @return

1992

1993

static SequenceI findCdsForProtein(List<AlignedCodonFrame> mappings,

1994

SequenceI dnaSeq, List<AlignedCodonFrame> seqMappings,

Mapping aMapping)

{

* TODO a better dna-cds-protein mapping data representation to allow easy

1999

* navigation; until then this clunky looping around lists of mappings

2000

2001

SequenceI seqDss = dnaSeq.getDatasetSequence() == null ? dnaSeq

2002

: dnaSeq.getDatasetSequence();

2003

SequenceI proteinProduct = aMapping.getTo();

2004

2005

2006

* is this mapping from the whole dna sequence (i.e. CDS)?

2007

* allowing for possible stop codon on dna but not peptide

2008

2009

int mappedFromLength = MappingUtils

2010

.getLength(aMapping.getMap().getFromRanges());

2011

int dnaLength = seqDss.getLength();

2012

if (mappedFromLength == dnaLength

2013

|| mappedFromLength == dnaLength - CODON_LENGTH)

2014

{

2015

2016

* if sequence has CDS features, this is a transcript with no UTR

2017

* - do not take this as the CDS sequence! (JAL-2789)

2018

2019

if (seqDss.getFeatures().getFeaturesByOntology(SequenceOntologyI.CDS)

.isEmpty())

{

return seqDss;

}

}

* looks like we found the dna-to-protein mapping; search for the

2028

* corresponding cds-to-protein mapping

2029

2030

List<AlignedCodonFrame> mappingsToPeptide = MappingUtils

2031

.findMappingsForSequence(proteinProduct, mappings);

2032

for (AlignedCodonFrame acf : mappingsToPeptide)

2033

{

2034

for (SequenceToSequenceMapping map : acf.getMappings())

2035

{

2036

276

Mapping mapping = map.getMapping();

2037

276

if (mapping != aMapping

2038

&& mapping.getMap().getFromRatio() == CODON_LENGTH

2039

&& proteinProduct == mapping.getTo()

2040

&& seqDss != map.getFromSeq())

2041

{

2042

mappedFromLength = MappingUtils

2043

.getLength(mapping.getMap().getFromRanges());

2044

if (mappedFromLength == map.getFromSeq().getLength())

2045

{

2046

2047

* found a 3:1 mapping to the protein product which covers

2048

* the whole dna sequence i.e. is from CDS; finally check the CDS

2049

* is mapped from the given dna start sequence

2050

2051

SequenceI cdsSeq = map.getFromSeq();

2052

// todo this test is weak if seqMappings contains multiple mappings;

2053

// we get away with it if transcript:cds relationship is 1:1

2054

List<AlignedCodonFrame> dnaToCdsMaps = MappingUtils

2055

.findMappingsForSequence(cdsSeq, seqMappings);

2056

if (!dnaToCdsMaps.isEmpty())

{

return cdsSeq;

}

}

}

}

}

return null;

}

/**

* Helper method that makes a CDS sequence as defined by the mappings from the

2069

* given sequence i.e. extracts the 'mapped from' ranges (which may be on

2070

* forward or reverse strand).

* @param seq

* @param mapping

* @param dataset

* - existing dataset. We check for sequences that look like the CDS

2076

* we are about to construct, if one exists already, then we will

2077

* just return that one.

2078

* @return CDS sequence (as a dataset sequence)

2079

2080

static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping,

AlignmentI dataset)

{

* construct CDS sequence name as "CDS|" with 'from id' held in the mapping

2085

* if set (e.g. EMBL protein_id), else sequence name appended

2086

2087

String mapFromId = mapping.getMappedFromId();

2088

final String seqId = "CDS|"

2089

+ (mapFromId != null ? mapFromId : seq.getName());

2090

2091

SequenceI newSeq = null;

2092

2093

2094

* construct CDS sequence by splicing mapped from ranges

2095

2096

char[] seqChars = seq.getSequence();

2097

List<int[]> fromRanges = mapping.getMap().getFromRanges();

2098

int cdsWidth = MappingUtils.getLength(fromRanges);

2099

char[] newSeqChars = new char[cdsWidth];

2100

2101

int newPos = 0;

2102

for (int[] range : fromRanges)

2103

{

2104

if (range[0] <= range[1])

2105

{

2106

// forward strand mapping - just copy the range

2107

int length = range[1] - range[0] + 1;

2108

System.arraycopy(seqChars, range[0] - 1, newSeqChars, newPos,

length);

newPos += length;

}

else

{

// reverse strand mapping - copy and complement one by one

2115

for (int i = range[0]; i >= range[1]; i--)

2116

{

2117

newSeqChars[newPos++] = Dna.getComplement(seqChars[i - 1]);

}

}

newSeq = new Sequence(seqId, newSeqChars, 1, newPos);

}

if (dataset != null)

{

SequenceI[] matches = dataset.findSequenceMatch(newSeq.getName());

2127

if (matches != null)

2128

{

2129

boolean matched = false;

2130

for (SequenceI mtch : matches)

2131

{

2132

if (mtch.getStart() != newSeq.getStart())

{

continue;

}

if (mtch.getEnd() != newSeq.getEnd())

{

continue;

}

if (!Arrays.equals(mtch.getSequence(), newSeq.getSequence()))

{

continue;

}

if (!matched)

{

matched = true;

newSeq = mtch;

}

else

{

Console.error(

"JAL-2154 regression: warning - found (and ignored) a duplicate CDS sequence:"

+ mtch.toString());

}

}

}

}

// newSeq.setDescription(mapFromId);

return newSeq;

}

/**

* Adds any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to

* the given mapping.

* @param cdsSeq

* @param contig

* @param proteinProduct

2170

* @param mapping

2171

* @return list of DBRefEntrys added

2172

2173

protected static List<DBRefEntry> propagateDBRefsToCDS(SequenceI cdsSeq,

2174

SequenceI contig, SequenceI proteinProduct, Mapping mapping)

2175

{

2176

2177

// gather direct refs from contig congruent with mapping

2178

List<DBRefEntry> direct = new ArrayList<>();

2179

HashSet<String> directSources = new HashSet<>();

2180

2181

List<DBRefEntry> refs = contig.getDBRefs();

2182

if (refs != null)

2183

{

2184

292

for (int ib = 0, nb = refs.size(); ib < nb; ib++)

2185

{

2186

279

DBRefEntry dbr = refs.get(ib);

2187

279

MapList map;

2188

if (dbr.hasMap() && (map = dbr.getMap().getMap()).isTripletMap())

2189

{

2190

// check if map is the CDS mapping

2191

if (mapping.getMap().equals(map))

2192

{

2193

direct.add(dbr);

2194

directSources.add(dbr.getSource());

}

}

}

}

List<DBRefEntry> onSource = DBRefUtils.selectRefs(

2200

proteinProduct.getDBRefs(),

2201

directSources.toArray(new String[0]));

2202

List<DBRefEntry> propagated = new ArrayList<>();

2203

2204

// and generate appropriate mappings

2205

for (int ic = 0, nc = direct.size(); ic < nc; ic++)

2206

{

2207

DBRefEntry cdsref = direct.get(ic);

2208

Mapping m = cdsref.getMap();

2209

// clone maplist and mapping

2210

MapList cdsposmap = new MapList(

2211

Arrays.asList(new int[][]

2212

{ new int[] { cdsSeq.getStart(), cdsSeq.getEnd() } }),

2213

m.getMap().getToRanges(), 3, 1);

2214

Mapping cdsmap = new Mapping(m.getTo(), m.getMap());

2215

2216

// create dbref

2217

DBRefEntry newref = new DBRefEntry(cdsref.getSource(),

2218

cdsref.getVersion(), cdsref.getAccessionId(),

2219

new Mapping(cdsmap.getTo(), cdsposmap));

2220

2221

// and see if we can map to the protein product for this mapping.

2222

// onSource is the filtered set of accessions on protein that we are

2223

// tranferring, so we assume accession is the same.

2224

if (cdsmap.getTo() == null && onSource != null)

2225

{

2226

List<DBRefEntry> sourceRefs = DBRefUtils.searchRefs(onSource,

2227

cdsref.getAccessionId());

2228

if (sourceRefs != null)

2229

{

2230

for (DBRefEntry srcref : sourceRefs)

2231

{

2232

if (srcref.getSource().equalsIgnoreCase(cdsref.getSource()))

2233

{

2234

// we have found a complementary dbref on the protein product, so

2235

// update mapping's getTo

2236

newref.getMap().setTo(proteinProduct);

}

}

}

}

cdsSeq.addDBRef(newref);

2242

propagated.add(newref);

}

return propagated;

}

/**

* Transfers co-located features on 'fromSeq' to 'toSeq', adjusting the

2249

* feature start/end ranges, optionally omitting specified feature types.

2250

* Returns the number of features copied.

* @param fromSeq

* @param toSeq

* @param mapping

* the mapping from 'fromSeq' to 'toSeq'

2256

* @param select

2257

* if not null, only features of this type are copied (including

2258

* subtypes in the Sequence Ontology)

2259

* @param omitting

2260

2261

protected static int transferFeatures(SequenceI fromSeq, SequenceI toSeq,

2262

MapList mapping, String select, String... omitting)

2263

{

2264

SequenceI copyTo = toSeq;

2265

while (copyTo.getDatasetSequence() != null)

2266

{

2267

copyTo = copyTo.getDatasetSequence();

2268

}

2269

if (fromSeq == copyTo || fromSeq.getDatasetSequence() == copyTo)

2270

{

2271

return 0; // shared dataset sequence

}

* get features, optionally restricted by an ontology term

2276

2277

List<SequenceFeature> sfs = select == null

2278

? fromSeq.getFeatures().getPositionalFeatures()

2279

: fromSeq.getFeatures().getFeaturesByOntology(select);

2280

2281

int count = 0;

2282

for (SequenceFeature sf : sfs)

2283

{

2284

9610

String type = sf.getType();

2285

9610

boolean omit = false;

2286

9610

for (String toOmit : omitting)

2287

{

2288

9603

if (type.equals(toOmit))

2289

{

2290

134

omit = true;

2291

}

2292

}

2293

9610

if (omit)

2294

{

2295

134

continue;

}

* locate the mapped range - null if either start or end is

2300

* not mapped (no partial overlaps are calculated)

2301

2302

9476

int start = sf.getBegin();

2303

9476

int end = sf.getEnd();

2304

9476

int[] mappedTo = mapping.locateInTo(start, end);

2305

2306

* if whole exon range doesn't map, try interpreting it

2307

* as 5' or 3' exon overlapping the CDS range

2308

2309

9476

if (mappedTo == null)

2310

{

2311

4447

mappedTo = mapping.locateInTo(end, end);

2312

4447

if (mappedTo != null)

2313

{

2314

2315

* end of exon is in CDS range - 5' overlap

2316

* to a range from the start of the peptide

mappedTo[0] = 1;

}

}

9476

if (mappedTo == null)

2322

{

2323

4447

mappedTo = mapping.locateInTo(start, start);

2324

4447

if (mappedTo != null)

2325

{

2326

2327

* start of exon is in CDS range - 3' overlap

2328

* to a range up to the end of the peptide

2329

2330

mappedTo[1] = toSeq.getLength();

2331

}

2332

}

2333

9476

if (mappedTo != null)

2334

{

2335

5029

int newBegin = Math.min(mappedTo[0], mappedTo[1]);

2336

5029

int newEnd = Math.max(mappedTo[0], mappedTo[1]);

2337

5029

SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd,

2338

sf.getFeatureGroup(), sf.getScore());

2339

5029

copyTo.addSequenceFeature(copy);

2340

5029

count++;

}

}

return count;

}

/**

* Returns a mapping from dna to protein by inspecting sequence features of

2348

* type "CDS" on the dna. A mapping is constructed if the total CDS feature

2349

* length is 3 times the peptide length (optionally after dropping a trailing

2350

* stop codon). This method does not check whether the CDS nucleotide sequence

2351

* translates to the peptide sequence.

* @param dnaSeq

* @param proteinSeq

* @return

public static MapList mapCdsToProtein(SequenceI dnaSeq,

2358

SequenceI proteinSeq)

2359

{

2360

List<int[]> ranges = findCdsPositions(dnaSeq);

2361

int mappedDnaLength = MappingUtils.getLength(ranges);

2362

2363

2364

* if not a whole number of codons, truncate mapping

2365

2366

int codonRemainder = mappedDnaLength % CODON_LENGTH;

2367

if (codonRemainder > 0)

2368

{

2369

mappedDnaLength -= codonRemainder;

2370

MappingUtils.removeEndPositions(codonRemainder, ranges);

2371

}

2372

2373

int proteinLength = proteinSeq.getLength();

2374

int proteinStart = proteinSeq.getStart();

2375

int proteinEnd = proteinSeq.getEnd();

2376

2377

2378

* incomplete start codon may mean X at start of peptide

2379

* we ignore both for mapping purposes

2380

2381

if (proteinSeq.getCharAt(0) == 'X')

2382

{

2383

// todo JAL-2022 support startPhase > 0

proteinStart++;

proteinLength--;

}

List<int[]> proteinRange = new ArrayList<>();

2388

2389

2390

* dna length should map to protein (or protein plus stop codon)

2391

2392

int codesForResidues = mappedDnaLength / CODON_LENGTH;

2393

if (codesForResidues == (proteinLength + 1))

2394

{

2395

// assuming extra codon is for STOP and not in peptide

2396

// todo: check trailing codon is indeed a STOP codon

2397

codesForResidues--;

2398

mappedDnaLength -= CODON_LENGTH;

2399

MappingUtils.removeEndPositions(CODON_LENGTH, ranges);

2400

}

2401

2402

if (codesForResidues == proteinLength)

2403

{

2404

proteinRange.add(new int[] { proteinStart, proteinEnd });

2405

return new MapList(ranges, proteinRange, CODON_LENGTH, 1);

}

return null;

}

/**

* Returns a list of CDS ranges found (as sequence positions base 1), i.e. of

2412

* [start, end] positions of sequence features of type "CDS" (or a sub-type of

2413

* CDS in the Sequence Ontology). The ranges are sorted into ascending start

2414

* position order, so this method is only valid for linear CDS in the same

2415

* sense as the protein product.

* @param dnaSeq

* @return

protected static List<int[]> findCdsPositions(SequenceI dnaSeq)

2421

{

2422

List<int[]> result = new ArrayList<>();

2423

2424

List<SequenceFeature> sfs = dnaSeq.getFeatures()

2425

.getFeaturesByOntology(SequenceOntologyI.CDS);

if (sfs.isEmpty())

{

return result;

}

SequenceFeatures.sortFeatures(sfs, true);

2431

2432

for (SequenceFeature sf : sfs)

{

int phase = 0;

try

{

String s = sf.getPhase();

2438

if (s != null)

2439

{

2440

phase = Integer.parseInt(s);

2441

}

2442

} catch (NumberFormatException e)

{

// leave as zero

}

* phase > 0 on first codon means 5' incomplete - skip to the start

2448

* of the next codon; example ENST00000496384

2449

2450

int begin = sf.getBegin();

2451

int end = sf.getEnd();

2452

if (result.isEmpty() && phase > 0)

{

begin += phase;

if (begin > end)

{

// shouldn't happen!

System.err

.println("Error: start phase extends beyond start CDS in "

+ dnaSeq.getName());

}

}

result.add(new int[] { begin, end });

}

* Finally sort ranges by start position. This avoids a dependency on

2468

* keeping features in order on the sequence (if they are in order anyway,

2469

* the sort will have almost no work to do). The implicit assumption is CDS

2470

* ranges are assembled in order. Other cases should not use this method,

2471

* but instead construct an explicit mapping for CDS (e.g. EMBL parsing).

2472

2473

Collections.sort(result, IntRangeComparator.ASCENDING);

return result;

}

/**

* Makes an alignment with a copy of the given sequences, adding in any

2479

* non-redundant sequences which are mapped to by the cross-referenced

* sequences.

* @param seqs

* @param xrefs

* @param dataset

* the alignment dataset shared by the new copy

2486

* @return

2487

2488

public static AlignmentI makeCopyAlignment(SequenceI[] seqs,

2489

SequenceI[] xrefs, AlignmentI dataset)

2490

{

2491

AlignmentI copy = new Alignment(new Alignment(seqs));

2492

copy.setDataset(dataset);

2493

boolean isProtein = !copy.isNucleotide();

2494

SequenceIdMatcher matcher = new SequenceIdMatcher(seqs);

2495

if (xrefs != null)

2496

{

2497

// BH 2019.01.25 recoded to remove iterators

2498

2499

for (int ix = 0, nx = xrefs.length; ix < nx; ix++)

2500

{

2501

SequenceI xref = xrefs[ix];

2502

List<DBRefEntry> dbrefs = xref.getDBRefs();

2503

if (dbrefs != null)

2504

{

2505

for (int ir = 0, nir = dbrefs.size(); ir < nir; ir++)

2506

{

2507

DBRefEntry dbref = dbrefs.get(ir);

2508

Mapping map = dbref.getMap();

2509

SequenceI mto;

2510

if (map == null || (mto = map.getTo()) == null

2511

|| mto.isProtein() != isProtein)

{

continue;

}

SequenceI mappedTo = mto;

2516

SequenceI match = matcher.findIdMatch(mappedTo);

2517

if (match == null)

2518

{

2519

matcher.add(mappedTo);

2520

copy.addSequence(mappedTo);

}

}

}

}

}

return copy;

}

/**

* Try to align sequences in 'unaligned' to match the alignment of their

2531

* mapped regions in 'aligned'. For example, could use this to align CDS

2532

* sequences which are mapped to their parent cDNA sequences.

2533

2534

* This method handles 1:1 mappings (dna-to-dna or protein-to-protein). For

2535

* dna-to-protein or protein-to-dna use alternative methods.

2536

2537

* @param unaligned

2538

* sequences to be aligned

2539

* @param aligned

2540

* holds aligned sequences and their mappings

2541

* @return

2542

2543

public static int alignAs(AlignmentI unaligned, AlignmentI aligned)

2544

{

2545

2546

* easy case - aligning a copy of aligned sequences

2547

2548

if (alignAsSameSequences(unaligned, aligned))

2549

{

2550

return unaligned.getHeight();

}

* fancy case - aligning via mappings between sequences

2555

2556

List<SequenceI> unmapped = new ArrayList<>();

2557

Map<Integer, Map<SequenceI, Character>> columnMap = buildMappedColumnsMap(

2558

unaligned, aligned, unmapped);

2559

int width = columnMap.size();

2560

char gap = unaligned.getGapCharacter();

2561

int realignedCount = 0;

2562

// TODO: verify this loop scales sensibly for very wide/high alignments

2563

2564

for (SequenceI seq : unaligned.getSequences())

2565

{

2566

if (!unmapped.contains(seq))

2567

{

2568

char[] newSeq = new char[width];

2569

Arrays.fill(newSeq, gap); // JBPComment - doubt this is faster than the

2570

// Integer iteration below

int newCol = 0;

int lastCol = 0;

* traverse the map to find columns populated

2576

* by our sequence

2577

2578

for (Integer column : columnMap.keySet())

2579

{

2580

58976

Character c = columnMap.get(column).get(seq);

2581

58976

if (c != null)

2582

{

2583

2584

* sequence has a character at this position

2585

2586

2587

31986

newSeq[newCol] = c;

2588

31986

lastCol = newCol;

2589

}

2590

58976

newCol++;

}

* trim trailing gaps

if (lastCol < width)

{

char[] tmp = new char[lastCol + 1];

2599

System.arraycopy(newSeq, 0, tmp, 0, lastCol + 1);

2600

newSeq = tmp;

2601

}

2602

// TODO: optimise SequenceI to avoid char[]->String->char[]

2603

seq.setSequence(String.valueOf(newSeq));

realignedCount++;

}

}

return realignedCount;

}

/**

* If unaligned and aligned sequences share the same dataset sequences, then

2612

* simply copies the aligned sequences to the unaligned sequences and returns

2613

* true; else returns false

2614

2615

* @param unaligned

2616

* - sequences to be aligned based on aligned

2617

* @param aligned

2618

* - 'guide' alignment containing sequences derived from same dataset

* as unaligned

* @return

static boolean alignAsSameSequences(AlignmentI unaligned,

2623

AlignmentI aligned)

2624

{

2625

if (aligned.getDataset() == null || unaligned.getDataset() == null)

2626

{

2627

return false; // should only pass alignments with datasets here

2628

}

2629

2630

// map from dataset sequence to alignment sequence(s)

2631

Map<SequenceI, List<SequenceI>> alignedDatasets = new HashMap<>();

2632

for (SequenceI seq : aligned.getSequences())

2633

{

2634

SequenceI ds = seq.getDatasetSequence();

2635

if (alignedDatasets.get(ds) == null)

2636

{

2637

alignedDatasets.put(ds, new ArrayList<SequenceI>());

2638

}

2639

alignedDatasets.get(ds).add(seq);

}

* first pass - check whether all sequences to be aligned share a

2644

* dataset sequence with an aligned sequence; also note the leftmost

2645

* ungapped column from which to copy

2646

2647

int leftmost = Integer.MAX_VALUE;

2648

for (SequenceI seq : unaligned.getSequences())

2649

{

2650

final SequenceI ds = seq.getDatasetSequence();

2651

if (!alignedDatasets.containsKey(ds))

{

return false;

}

SequenceI alignedSeq = alignedDatasets.get(ds).get(0);

2656

int startCol = alignedSeq.findIndex(seq.getStart()); // 1..

2657

leftmost = Math.min(leftmost, startCol);

}

* second pass - copy aligned sequences;

2662

* heuristic rule: pair off sequences in order for the case where

2663

* more than one shares the same dataset sequence

2664

2665

final char gapCharacter = aligned.getGapCharacter();

2666

for (SequenceI seq : unaligned.getSequences())

2667

{

2668

List<SequenceI> alignedSequences = alignedDatasets

2669

.get(seq.getDatasetSequence());

2670

if (alignedSequences.isEmpty())

2671

{

2672

2673

* defensive check - shouldn't happen! (JAL-3536)

continue;

}

SequenceI alignedSeq = alignedSequences.get(0);

2678

2679

2680

* gap fill for leading (5') UTR if any

2681

2682

// TODO this copies intron columns - wrong!

2683

int startCol = alignedSeq.findIndex(seq.getStart()); // 1..

2684

int endCol = alignedSeq.findIndex(seq.getEnd());

2685

char[] seqchars = new char[endCol - leftmost + 1];

2686

Arrays.fill(seqchars, gapCharacter);

2687

char[] toCopy = alignedSeq.getSequence(startCol - 1, endCol);

2688

System.arraycopy(toCopy, 0, seqchars, startCol - leftmost,

2689

toCopy.length);

2690

seq.setSequence(String.valueOf(seqchars));

2691

if (alignedSequences.size() > 0)

2692

{

2693

// pop off aligned sequences (except the last one)

2694

alignedSequences.remove(0);

}

}

* finally remove gapped columns (e.g. introns)

2700

2701

new RemoveGapColCommand("", unaligned.getSequencesArray(), 0,

2702

unaligned.getWidth() - 1, unaligned);

return true;

}

/**

* Returns a map whose key is alignment column number (base 1), and whose

2709

* values are a map of sequence characters in that column.

* @param unaligned

* @param aligned

* @param unmapped

* @return

static SortedMap<Integer, Map<SequenceI, Character>> buildMappedColumnsMap(

2717

AlignmentI unaligned, AlignmentI aligned,

2718

List<SequenceI> unmapped)

2719

{

2720

2721

* Map will hold, for each aligned column position, a map of

2722

* {unalignedSequence, characterPerSequence} at that position.

2723

* TreeMap keeps the entries in ascending column order.

2724

2725

SortedMap<Integer, Map<SequenceI, Character>> map = new TreeMap<>();

2726

2727

2728

* record any sequences that have no mapping so can't be realigned

2729

2730

unmapped.addAll(unaligned.getSequences());

2731

2732

List<AlignedCodonFrame> mappings = aligned.getCodonFrames();

2733

2734

for (SequenceI seq : unaligned.getSequences())

2735

{

2736

for (AlignedCodonFrame mapping : mappings)

2737

{

2738

510

SequenceI fromSeq = mapping.findAlignedSequence(seq, aligned);

2739

510

if (fromSeq != null)

2740

{

2741

Mapping seqMap = mapping.getMappingBetween(fromSeq, seq);

2742

if (addMappedPositions(seq, fromSeq, seqMap, map))

2743

{

2744

unmapped.remove(seq);

}

}

}

}

return map;

}

/**

* Helper method that adds to a map the mapped column positions of a sequence.

2754

* <br>

2755

* For example if aaTT-Tg-gAAA is mapped to TTTAAA then the map should record

2756

* that columns 3,4,6,10,11,12 map to characters T,T,T,A,A,A of the mapped to

* sequence.

* @param seq

* the sequence whose column positions we are recording

2761

* @param fromSeq

2762

* a sequence that is mapped to the first sequence

2763

* @param seqMap

2764

* the mapping from 'fromSeq' to 'seq'

2765

* @param map

2766

* a map to add the column positions (in fromSeq) of the mapped

* positions of seq

* @return

static boolean addMappedPositions(SequenceI seq, SequenceI fromSeq,

2771

Mapping seqMap, Map<Integer, Map<SequenceI, Character>> map)

{

if (seqMap == null)

{

return false;

}

* invert mapping if it is from unaligned to aligned sequence

2780

2781

if (seqMap.getTo() == fromSeq.getDatasetSequence())

2782

{

2783

seqMap = new Mapping(seq.getDatasetSequence(),

2784

seqMap.getMap().getInverse());

2785

}

2786

2787

int toStart = seq.getStart();

2788

2789

2790

* traverse [start, end, start, end...] ranges in fromSeq

2791

2792

for (int[] fromRange : seqMap.getMap().getFromRanges())

2793

{

2794

for (int i = 0; i < fromRange.length - 1; i += 2)

2795

{

2796

boolean forward = fromRange[i + 1] >= fromRange[i];

2797

2798

2799

* find the range mapped to (sequence positions base 1)

2800

2801

int[] range = seqMap.locateMappedRange(fromRange[i],

fromRange[i + 1]);

if (range == null)

{

jalview.bin.Console.errPrintln("Error in mapping " + seqMap

2806

+ " from " + fromSeq.getName());

2807

return false;

2808

}

2809

int fromCol = fromSeq.findIndex(fromRange[i]);

2810

int mappedCharPos = range[0];

2811

2812

2813

* walk over the 'from' aligned sequence in forward or reverse

2814

* direction; when a non-gap is found, record the column position

2815

* of the next character of the mapped-to sequence; stop when all

2816

* the characters of the range have been counted

2817

2818

2794274

while (mappedCharPos <= range[1] && fromCol <= fromSeq.getLength()

2819

&& fromCol >= 0)

2820

{

2821

2794243

if (!Comparison.isGap(fromSeq.getCharAt(fromCol - 1)))

2822

{

2823

2824

* mapped from sequence has a character in this column

2825

* record the column position for the mapped to character

2826

2827

31998

Map<SequenceI, Character> seqsMap = map.get(fromCol);

2828

31998

if (seqsMap == null)

2829

{

2830

5398

seqsMap = new HashMap<>();

2831

5398

map.put(fromCol, seqsMap);

2832

}

2833

31998

seqsMap.put(seq, seq.getCharAt(mappedCharPos - toStart));

2834

31998

mappedCharPos++;

2835

}

2836

2794243

fromCol += (forward ? 1 : -1);

}

}

}

return true;

}

// strictly temporary hack until proper criteria for aligning protein to cds

2844

// are in place; this is so Ensembl -> fetch xrefs Uniprot aligns the Uniprot

2845

public static boolean looksLikeEnsembl(AlignmentI alignment)

2846

{

2847

for (SequenceI seq : alignment.getSequences())

2848

{

2849

String name = seq.getName();

2850

if (!name.startsWith("ENSG") && !name.startsWith("ENST"))

{

return false;

}

}

return true;

}

public static List<String> getSecondaryStructureSources(

2859

AlignmentAnnotation[] annotations)

2860

{

2861

2862

List<String> ssSources = new ArrayList<>();

2863

Set<String> addedLabels = new HashSet<>(); // to keep track of added labels

2864

2865

for (AlignmentAnnotation annotation : annotations)

2866

{

2867

String label = annotation.label;

2868

if (Constants.SECONDARY_STRUCTURE_LABELS.containsKey(label)

2869

&& !addedLabels.contains(label))

2870

{

2871

ssSources.add(Constants.SECONDARY_STRUCTURE_LABELS.get(label));

2872

addedLabels.add(label); // Add the label to the set

}

}

return ssSources;

}

public static boolean isSecondaryStructurePresent(

2880

AlignmentAnnotation[] annotations)

2881

{

2882

boolean ssPresent = false;

2883

2884

for (AlignmentAnnotation aa : annotations)

{

if (ssPresent)

{

break;

}

if (Constants.SECONDARY_STRUCTURE_LABELS.containsKey(aa.label))

{

ssPresent = true;

break;

}

}

return ssPresent;

}

public static Color getSecondaryStructureAnnotationColour(char symbol)

2903

{

2904

2905

if (symbol == Constants.COIL)

{

return Color.gray;

}

if (symbol == Constants.SHEET)

{

return Color.green;

}

if (symbol == Constants.HELIX)

{

return Color.red;

}

return Color.gray;

}

public static char findSSAnnotationForGivenSeqposition(

2922

AlignmentAnnotation aa, int seqPosition)

{

char ss = '*';

if (aa != null)

{

if (aa.getAnnotationForPosition(seqPosition) != null)

2929

{

2930

Annotation a = aa.getAnnotationForPosition(seqPosition);

2931

ss = a.secondaryStructure;

2932

2933

// There is no representation for coil and it can be either ' ' or null.

2934

if (ss == ' ' || ss == '-')

{

ss = Constants.COIL;

}

}

else

{

ss = Constants.COIL;

}

}

return ss;

}

public static List<String> extractSSSourceInAlignmentAnnotation(

2949

AlignmentAnnotation[] annotations)

2950

{

2951

2952

List<String> ssSources = new ArrayList<>();

2953

Set<String> addedSources = new HashSet<>(); // to keep track of added

2954

// sources

2955

2956

for (AlignmentAnnotation aa : annotations)

2957

{

2958

2959

String ssSource = extractSSSourceFromAnnotationDescription(aa);

2960

2961

if (ssSource != null && !addedSources.contains(ssSource))

2962

{

2963

ssSources.add(ssSource);

2964

addedSources.add(ssSource);

}

}

Collections.sort(ssSources);

return ssSources;

}

public static String extractSSSourceFromAnnotationDescription(

2975

AlignmentAnnotation aa)

2976

{

2977

2978

for (String label : Constants.SECONDARY_STRUCTURE_LABELS.keySet())

2979

{

2980

2981

if (label.equals(aa.label))

{

// For JPred

if (aa.label.equals(Constants.SS_ANNOTATION_FROM_JPRED_LABEL))

2986

{

2987

2988

return (Constants.SECONDARY_STRUCTURE_LABELS.get(aa.label));

}

// For input with secondary structure

2993

if (aa.label.equals(Constants.SS_ANNOTATION_LABEL)

2994

&& aa.description.equals(Constants.SS_ANNOTATION_LABEL))

2995

{

2996

2997

return (Constants.SECONDARY_STRUCTURE_LABELS.get(aa.label));

}

// For other sources

if (aa.sequenceRef == null)

{

return null;

}

else if (aa.sequenceRef.getDatasetSequence() == null)

{

return null;

}

Vector<PDBEntry> pdbEntries = aa.sequenceRef.getDatasetSequence()

3011

.getAllPDBEntries();

3012

3013

for (PDBEntry entry : pdbEntries)

3014

{

3015

3016

String entryProvider = entry.getProvider();

3017

if (entryProvider == null)

3018

{

3019

entryProvider = "PDB";

3020

}

3021

3022

// Trim the string from first occurrence of colon

3023

String entryID = entry.getId();

3024

int index = entryID.indexOf(':');

3025

3026

// Check if colon exists

if (index != -1)

{

// Trim the string from first occurrence of colon

3031

entryID = entryID.substring(0, index);

}

if (entryProvider == "PDB" && aa.description.toLowerCase()

3036

.contains("secondary structure for "

3037

+ entryID.toLowerCase()))

3038

{

3039

3040

return entryProvider;

}

else if (entryProvider != "PDB" && aa.description.toLowerCase()

3045

.contains(entryID.toLowerCase()))

3046

{

3047

3048

return entryProvider;

}

}

}

}

return null;

}

// to do set priority for labels

3062

241392

public static AlignmentAnnotation getDisplayedAlignmentAnnotation(

SequenceI seq)

{

241392

for (String ssLabel : Constants.SECONDARY_STRUCTURE_LABELS.keySet())

3067

{

3068

3069

482784

AlignmentAnnotation[] aa = seq.getAnnotation(ssLabel);

3070

482784

if (aa != null)

3071

{

3072

3073

for (AlignmentAnnotation annot : aa)

3074

{

3075

if (annot.isForDisplay())

{

return annot;

}

}

}

}

241392

return null;

}

}

Coverage Report

File AlignmentUtils.java

Coverage histogram

Code metrics

Classes

Class AlignmentUtils

Class AlignmentUtils.DnaVariant

Contributing tests

Contributing tests

Source view