File AlignmentUtils.java

Branches:

388

Statements:

801

Methods:

Classes:

LOC:

2,777

NCLOC:

1,660

Total complexity:

290

Complexity density:

0.36

Statements/Method:

18.2

Methods/Class:

Average method complexity:

6.59

Classes

Class	Line #	Total Statements	Complexity	TOTAL Coverage	Actions
AlignmentUtils	70	795	284	0.8408531584.1%
AlignmentUtils.DnaVariant	86	6	6	0.00%

Class AlignmentUtils

Class AlignmentUtils	Line # 70	Total Statements 795	Complexity 284	TOTAL Coverage 0.8408531584.1%
expandContext(AlignmentI,int) : AlignmentI expandContext(AlignmentI,int) : AlignmentI	127127	46.046	10.010	0.983871 0.98387198.4%
getSequenceIndex(AlignmentI,SequenceI) : int getSequenceIndex(AlignmentI,SequenceI) : int	233233	8.08	2.02	1.0 1.0100%
getSequencesByName(AlignmentI) : Map<String, List<SequenceI>> getSequencesByName(AlignmentI) : Map<String, List<SequenceI>>	256256	10.010	3.03	0.9285714 0.928571492.9%
mapProteinAlignmentToCdna(AlignmentI,AlignmentI) : boolean mapProteinAlignmentToCdna(AlignmentI,AlignmentI) : boolean	287287	7.07	3.03	0.7777778 0.777777877.8%
mapProteinToCdna(AlignmentI,AlignmentI,Set<SequenceI>,Set<SequenceI>,boolean) : boolean mapProteinToCdna(AlignmentI,AlignmentI,Set<SequenceI>,Set<SequenceI>,boolean) : boolean	329329	22.022	9.09	0.9375 0.937593.8%
mappingExists(List<AlignedCodonFrame>,SequenceI,SequenceI) : boolean mappingExists(List<AlignedCodonFrame>,SequenceI,SequenceI) : boolean	396396	5.05	3.03	0.6666667 0.666666766.7%
mapCdnaToProtein(SequenceI,SequenceI) : MapList mapCdnaToProtein(SequenceI,SequenceI) : MapList	429429	28.028	12.012	0.95238096 0.9523809695.2%
translatesAs(char[],int,char[]) : boolean translatesAs(char[],int,char[]) : boolean	517517	21.021	14.014	1.0 1.0100%
alignSequenceAs(SequenceI,AlignmentI,String,boolean,boolean) : boolean alignSequenceAs(SequenceI,AlignmentI,String,boolean,boolean) : boolean	593593	14.014	5.05	0.9 0.990%
alignSequenceAs(SequenceI,SequenceI,AlignedCodonFrame,String,char,boolean,boolean) : void alignSequenceAs(SequenceI,SequenceI,AlignedCodonFrame,String,char,boolean,boolean) : void	649649	60.060	20.020	1.0 1.0100%
calculateGapsToInsert(boolean,boolean,int,boolean,int,int,boolean) : int calculateGapsToInsert(boolean,boolean,int,boolean,int,int,boolean) : int	813813	15.015	10.010	1.0 1.0100%
alignProteinAsDna(AlignmentI,AlignmentI) : int alignProteinAsDna(AlignmentI,AlignmentI) : int	877877	6.06	3.03	0.625 0.62562.5%
alignCdsAsProtein(AlignmentI,AlignmentI) : int alignCdsAsProtein(AlignmentI,AlignmentI) : int	902902	18.018	5.05	0.8333333 0.833333383.3%
alignCdsSequenceAsProtein(SequenceI,AlignmentI,List<AlignedCodonFrame>,char) : boolean alignCdsSequenceAsProtein(SequenceI,AlignmentI,List<AlignedCodonFrame>,char) : boolean	947947	48.048	16.016	0.67105263 0.6710526367.1%
buildCodonColumnsMap(AlignmentI,AlignmentI,List<SequenceI>) : Map<AlignedCodon, Map<SequenceI, AlignedCodon>> buildCodonColumnsMap(AlignmentI,AlignmentI,List<SequenceI>) : Map<AlignedCodon, Map<SequenceI, AlignedCodon>>	10781078	13.013	2.02	1.0 1.0100%
addUnmappedPeptideStarts(Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,int) : void addUnmappedPeptideStarts(Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,int) : void	11341134	23.023	6.06	0.93939394 0.9393939493.9%
alignProteinAs(AlignmentI,Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,List<SequenceI>) : int alignProteinAs(AlignmentI,Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,List<SequenceI>) : int	12171217	17.017	2.02	1.0 1.0100%
addCodonPositions(SequenceI,SequenceI,char,Mapping,Map<AlignedCodon, Map<SequenceI, AlignedCodon>>) : void addCodonPositions(SequenceI,SequenceI,char,Mapping,Map<AlignedCodon, Map<SequenceI, AlignedCodon>>) : void	12821282	5.05	4.04	1.0 1.0100%
addCodonToMap(Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,AlignedCodon,SequenceI) : void addCodonToMap(Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,AlignedCodon,SequenceI) : void	13151315	5.05	2.02	1.0 1.0100%
isMappable(AlignmentI,AlignmentI) : boolean isMappable(AlignmentI,AlignmentI) : boolean	13451345	12.012	7.07	0.90909094 0.9090909490.9%
isMappable(SequenceI,SequenceI,List<AlignedCodonFrame>) : boolean isMappable(SequenceI,SequenceI,List<AlignedCodonFrame>) : boolean	13841384	8.08	6.06	0.5 0.550%
findAddableReferenceAnnotations(List<SequenceI>,Map<String, String>,Map<SequenceI, List<AlignmentAnnotation>>,AlignmentI) : void findAddableReferenceAnnotations(List<SequenceI>,Map<String, String>,Map<SequenceI, List<AlignmentAnnotation>>,AlignmentI) : void	14311431	18.018	7.07	0.9 0.990%
addReferenceAnnotations(Map<SequenceI, List<AlignmentAnnotation>>,AlignmentI,SequenceGroup) : void addReferenceAnnotations(Map<SequenceI, List<AlignmentAnnotation>>,AlignmentI,SequenceGroup) : void	14991499	14.014	3.03	0.0 0.00%
showOrHideSequenceAnnotations(AlignmentI,Collection<String>,List<SequenceI>,boolean,boolean) : void showOrHideSequenceAnnotations(AlignmentI,Collection<String>,List<SequenceI>,boolean,boolean) : void	15501550	6.06	7.07	0.9166667 0.916666791.7%
haveCrossRef(SequenceI,SequenceI) : boolean haveCrossRef(SequenceI,SequenceI) : boolean	15781578	1.01	1.01	1.0 1.0100%
hasCrossRef(SequenceI,SequenceI) : boolean hasCrossRef(SequenceI,SequenceI) : boolean	15931593	11.011	6.06	1.0 1.0100%
makeCdsAlignment(SequenceI[],AlignmentI,SequenceI[]) : AlignmentI makeCdsAlignment(SequenceI[],AlignmentI,SequenceI[]) : AlignmentI	16341634	66.066	16.016	0.8152174 0.815217481.5%
transferGeneLoci(SequenceI,MapList,SequenceI) : void transferGeneLoci(SequenceI,MapList,SequenceI) : void	18601860	8.08	4.04	0.9285714 0.928571492.9%
findCdsForProtein(List<AlignedCodonFrame>,SequenceI,List<AlignedCodonFrame>,Mapping) : SequenceI findCdsForProtein(List<AlignedCodonFrame>,SequenceI,List<AlignedCodonFrame>,Mapping) : SequenceI	18981898	19.019	11.011	0.4516129 0.451612945.2%
makeCdsSequence(SequenceI,Mapping,AlignmentI) : SequenceI makeCdsSequence(SequenceI,Mapping,AlignmentI) : SequenceI	19851985	38.038	12.012	0.67241377 0.6724137767.2%
propagateDBRefsToCDS(SequenceI,SequenceI,SequenceI,Mapping) : List<DBRefEntry> propagateDBRefsToCDS(SequenceI,SequenceI,SequenceI,Mapping) : List<DBRefEntry>	20922092	28.028	11.011	0.8863636 0.886363688.6%
transferFeatures(SequenceI,SequenceI,MapList,String,String) : int transferFeatures(SequenceI,SequenceI,MapList,String,String) : int	21802180	33.033	12.012	1.0 1.0100%
mapCdsToProtein(SequenceI,SequenceI) : MapList mapCdsToProtein(SequenceI,SequenceI) : MapList	22762276	22.022	5.05	1.0 1.0100%
findCdsPositions(SequenceI) : List<int[]> findCdsPositions(SequenceI) : List<int[]>	23392339	20.020	7.07	0.9285714 0.928571492.9%
makeCopyAlignment(SequenceI[],SequenceI[],AlignmentI) : AlignmentI makeCopyAlignment(SequenceI[],SequenceI[],AlignmentI) : AlignmentI	24072407	21.021	9.09	0.0 0.00%
alignAs(AlignmentI,AlignmentI) : int alignAs(AlignmentI,AlignmentI) : int	24622462	26.026	5.05	0.9411765 0.941176594.1%
alignAsSameSequences(AlignmentI,AlignmentI) : boolean alignAsSameSequences(AlignmentI,AlignmentI) : boolean	25412541	33.033	7.07	0.88372093 0.8837209388.4%
buildMappedColumnsMap(AlignmentI,AlignmentI,List<SequenceI>) : SortedMap<Integer, Map<SequenceI, Character>> buildMappedColumnsMap(AlignmentI,AlignmentI,List<SequenceI>) : SortedMap<Integer, Map<SequenceI, Character>>	26362636	11.011	3.03	0.8666667 0.866666786.7%
addMappedPositions(SequenceI,SequenceI,Mapping,Map<Integer, Map<SequenceI, Character>>) : boolean addMappedPositions(SequenceI,SequenceI,Mapping,Map<Integer, Map<SequenceI, Character>>) : boolean	26902690	24.024	11.011	0.8 0.880%
looksLikeEnsembl(AlignmentI) : boolean looksLikeEnsembl(AlignmentI) : boolean	27652765	5.05	3.03	0.71428573 0.7142857371.4%

Class AlignmentUtils.DnaVariant

Class AlignmentUtils.DnaVariant	Line # 86	Total Statements 6	Complexity 6	TOTAL Coverage 0.00%
DnaVariant(String) DnaVariant(String)	9292	2.02	1.01	0.0 0.00%
DnaVariant(String,SequenceFeature) DnaVariant(String,SequenceFeature)	9898	2.02	1.01	0.0 0.00%
getSource() : String getSource() : String	104104	1.01	2.02	0.0 0.00%
toString() : String toString() : String	112112	1.01	2.02	0.0 0.00%

Contributing tests

This file is covered by 85 tests. .

Contributing tests

Test contribution	Test	Result
0.22465532	jalview.io.CrossRef2xmlTests.openCrossrefsForEnsemblTwicejalview.io.CrossRef2xmlTests.openCrossrefsForEnsemblTwice	1PASS
0.15166262	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignmentjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment	1PASS
0.11922141	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withXrefsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withXrefs	1PASS
0.11922141	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withStartAndStopCodonsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withStartAndStopCodons	1PASS
0.10948905	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_multipleProteinsjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_multipleProteins	1PASS
0.10867802	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_filterProductsjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_filterProducts	1PASS
0.10381184	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_alternativeTranscriptsjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_alternativeTranscripts	1PASS
0.10381184	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_noXrefsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_noXrefs	1PASS
0.08840227	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_prioritiseXrefsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_prioritiseXrefs	1PASS
0.08678021	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withIntronsjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withIntrons	1PASS
0.08353609	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_noIntronsjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_noIntrons	1PASS
0.07866991	jalview.io.FeaturesFileTest.simpleGff3FileClassjalview.io.FeaturesFileTest.simpleGff3FileClass	1PASS
0.07866991	jalview.io.FeaturesFileTest.simpleGff3RelaxedIdMatchingjalview.io.FeaturesFileTest.simpleGff3RelaxedIdMatching	1PASS
0.07866991	jalview.io.FeaturesFileTest.readGff3Filejalview.io.FeaturesFileTest.readGff3File	1PASS
0.07866991	jalview.io.FeaturesFileTest.simpleGff3FileLoaderjalview.io.FeaturesFileTest.simpleGff3FileLoader	1PASS
0.07623682	jalview.datamodel.AlignmentTest.testAlignAs_dnaAsDnajalview.datamodel.AlignmentTest.testAlignAs_dnaAsDna	1PASS
0.07461476	jalview.analysis.AlignmentUtilsTests.testAlignAs_alternateTranscriptsUngappedjalview.analysis.AlignmentUtilsTests.testAlignAs_alternateTranscriptsUngapped	1PASS
0.07542579	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_keepIntronGapsOnlyjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_keepIntronGapsOnly	1PASS
0.07218167	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withUnmappedProteinjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withUnmappedProtein	1PASS
0.07137064	jalview.analysis.AlignmentUtilsTests.testAlignProteinAsDna_incompleteStartCodonjalview.analysis.AlignmentUtilsTests.testAlignProteinAsDna_incompleteStartCodon	1PASS
0.06731549	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withTrailingPeptidejalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withTrailingPeptide	1PASS
0.0648824	jalview.analysis.AlignmentUtilsTests.testAlignProteinAsDnajalview.analysis.AlignmentUtilsTests.testAlignProteinAsDna	1PASS
0.06407137	jalview.datamodel.AlignmentTest.testAlignAs_proteinAsCdnajalview.datamodel.AlignmentTest.testAlignAs_proteinAsCdna	1PASS
0.06163828	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_mappedProteinProteinjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_mappedProteinProtein	1PASS
0.05920519	jalview.datamodel.AlignmentTest.testAlignAs_cdnaAsProteinjalview.datamodel.AlignmentTest.testAlignAs_cdnaAsProtein	1PASS
0.060016222	jalview.analysis.AlignmentUtilsTests.testIsMappablejalview.analysis.AlignmentUtilsTests.testIsMappable	1PASS
0.05758313	jalview.datamodel.AlignmentTest.testAlignAs_cdnaAsProtein_singleSequencejalview.datamodel.AlignmentTest.testAlignAs_cdnaAsProtein_singleSequence	1PASS
0.04703974	jalview.analysis.AlignmentUtilsTests.testExpandContextjalview.analysis.AlignmentUtilsTests.testExpandContext	1PASS
0.04541768	jalview.analysis.AlignmentUtilsTests.testMapCdsToProteinjalview.analysis.AlignmentUtilsTests.testMapCdsToProtein	1PASS
0.03811841	jalview.analysis.AlignmentUtilsTests.testExpandContext_annotationjalview.analysis.AlignmentUtilsTests.testExpandContext_annotation	1PASS
0.03487429	jalview.analysis.AlignmentUtilsTests.testTransferFeaturesjalview.analysis.AlignmentUtilsTests.testTransferFeatures	1PASS
0.03568532	jalview.analysis.AlignmentUtilsTests.testMapCdnaToProtein_forSubsequencejalview.analysis.AlignmentUtilsTests.testMapCdnaToProtein_forSubsequence	1PASS
0.03081914	jalview.analysis.AlignmentUtilsTests.testTranslatesAsjalview.analysis.AlignmentUtilsTests.testTranslatesAs	1PASS
0.03081914	jalview.analysis.AlignmentUtilsTests.testAlignAsSameSequencesjalview.analysis.AlignmentUtilsTests.testAlignAsSameSequences	1PASS
0.02919708	jalview.analysis.AlignmentUtilsTests.testTransferFeatures_withOmitjalview.analysis.AlignmentUtilsTests.testTransferFeatures_withOmit	1PASS
0.030008111	jalview.analysis.AlignmentUtilsTests.testAlignAsSameSequencesMultipleSubSeqjalview.analysis.AlignmentUtilsTests.testAlignAsSameSequencesMultipleSubSeq	1PASS
0.02595296	jalview.analysis.AlignmentUtilsTests.testAddMappedPositions_withStopCodonjalview.analysis.AlignmentUtilsTests.testAddMappedPositions_withStopCodon	1PASS
0.02595296	jalview.analysis.AlignmentUtilsTests.testAddMappedPositionsjalview.analysis.AlignmentUtilsTests.testAddMappedPositions	1PASS
0.024330901	jalview.analysis.AlignmentUtilsTests.testTransferFeatures_withSelectjalview.analysis.AlignmentUtilsTests.testTransferFeatures_withSelect	1PASS
0.01946472	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenujalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu	1PASS
0.01946472	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_notOnAlignmentjalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_notOnAlignment	1PASS
0.020275751	jalview.analysis.AlignmentUtilsTests.testFindCdsPositions_fivePrimeIncompletejalview.analysis.AlignmentUtilsTests.testFindCdsPositions_fivePrimeIncomplete	1PASS
0.01540957	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_alreadyAddedjalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_alreadyAdded	1PASS
0.0162206	jalview.analysis.AlignmentUtilsTests.testHasCrossRefjalview.analysis.AlignmentUtilsTests.testHasCrossRef	1PASS
0.01540957	jalview.analysis.AlignmentUtilsTests.testFindCdsPositionsjalview.analysis.AlignmentUtilsTests.testFindCdsPositions	1PASS
0.0162206	jalview.analysis.AlignmentUtilsTests.testHaveCrossRefjalview.analysis.AlignmentUtilsTests.testHaveCrossRef	1PASS
0.01378751	jalview.ext.jmol.JmolViewerTest.testAddStrToSingleSeqViewJMoljalview.ext.jmol.JmolViewerTest.testAddStrToSingleSeqViewJMol	1PASS
0.01135442	jalview.analysis.AlignmentUtilsTests.testGetSequencesByNamejalview.analysis.AlignmentUtilsTests.testGetSequencesByName	1PASS
0.01135442	jalview.analysis.AlignmentUtilsTests.testTransferGeneLocijalview.analysis.AlignmentUtilsTests.testTransferGeneLoci	1PASS
0.00892133	jalview.analysis.AnnotationSorterTest.testSortBySequenceAndType_autocalcFirstjalview.analysis.AnnotationSorterTest.testSortBySequenceAndType_autocalcFirst	1PASS
0.00892133	jalview.ext.jmol.JmolParserTest.testAlignmentLoaderjalview.ext.jmol.JmolParserTest.testAlignmentLoader	1PASS
0.00892133	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_noReferenceAnnotationsjalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_noReferenceAnnotations	1PASS
0.00892133	jalview.io.AnnotatedPDBFileInputTest.testJalviewProjectRelocationAnnotationjalview.io.AnnotatedPDBFileInputTest.testJalviewProjectRelocationAnnotation	1PASS
0.00892133	jalview.analysis.AnnotationSorterTest.testSort_timingPresortedjalview.analysis.AnnotationSorterTest.testSort_timingPresorted	1PASS
0.00973236	jalview.analysis.AlignmentUtilsTests.testShowOrHideSequenceAnnotationsjalview.analysis.AlignmentUtilsTests.testShowOrHideSequenceAnnotations	1PASS
0.00892133	jalview.project.Jalview2xmlTests.testStoreAndRecoverPDBEntryjalview.project.Jalview2xmlTests.testStoreAndRecoverPDBEntry	1PASS
0.00892133	jalview.gui.AnnotationChooserTest.testSelectType_showForAlljalview.gui.AnnotationChooserTest.testSelectType_showForAll	1PASS
0.00892133	jalview.analysis.AnnotationSorterTest.testSortByTypeAndSequence_autocalcLastjalview.analysis.AnnotationSorterTest.testSortByTypeAndSequence_autocalcLast	1PASS
0.00892133	jalview.ext.jmol.JmolViewerTest.testSingleSeqViewJMoljalview.ext.jmol.JmolViewerTest.testSingleSeqViewJMol	1PASS
0.00892133	jalview.gui.AnnotationChooserTest.testDeselectType_showForSelectedjalview.gui.AnnotationChooserTest.testDeselectType_showForSelected	1PASS
0.00892133	jalview.gui.AnnotationChooserTest.testIsInActionScope_selectedScopejalview.gui.AnnotationChooserTest.testIsInActionScope_selectedScope	1PASS
0.00892133	jalview.gui.AlignFrameTest.testChangeColour_background_groupsAndThresholdsjalview.gui.AlignFrameTest.testChangeColour_background_groupsAndThresholds	1PASS
0.00892133	jalview.project.Jalview2xmlTests.testColourByAnnotScoresjalview.project.Jalview2xmlTests.testColourByAnnotScores	1PASS
0.00892133	jalview.gui.AnnotationChooserTest.testSelectType_showForSelectedjalview.gui.AnnotationChooserTest.testSelectType_showForSelected	1PASS
0.00892133	jalview.gui.AnnotationChooserTest.testResetOriginalStatejalview.gui.AnnotationChooserTest.testResetOriginalState	1PASS
0.00892133	jalview.analysis.AnnotationSorterTest.testSort_timingUnsortedjalview.analysis.AnnotationSorterTest.testSort_timingUnsorted	1PASS
0.00892133	jalview.analysis.AnnotationSorterTest.testSort_timingSemisortedjalview.analysis.AnnotationSorterTest.testSort_timingSemisorted	1PASS
0.00892133	jalview.gui.AlignFrameTest.testNewView_colourThresholdsjalview.gui.AlignFrameTest.testNewView_colourThresholds	1PASS
0.00892133	jalview.gui.AnnotationChooserTest.testSelectType_hideForSelectedjalview.gui.AnnotationChooserTest.testSelectType_hideForSelected	1PASS
0.00892133	jalview.project.Jalview2xmlTests.testRNAStructureRecoveryjalview.project.Jalview2xmlTests.testRNAStructureRecovery	1PASS
0.00892133	jalview.analysis.AnnotationSorterTest.testSortByTypeAndSequence_autocalcFirstjalview.analysis.AnnotationSorterTest.testSortByTypeAndSequence_autocalcFirst	1PASS
0.00892133	jalview.gui.AnnotationChooserTest.testDeselectType_showForAlljalview.gui.AnnotationChooserTest.testDeselectType_showForAll	1PASS
0.00892133	jalview.analysis.AnnotationSorterTest.testSortBySequenceAndType_autocalcLastjalview.analysis.AnnotationSorterTest.testSortBySequenceAndType_autocalcLast	1PASS
0.00892133	jalview.gui.AnnotationChooserTest.testIsInActionScope_unselectedScopejalview.gui.AnnotationChooserTest.testIsInActionScope_unselectedScope	1PASS
0.00892133	jalview.gui.PopupMenuTest.testHideInsertionsjalview.gui.PopupMenuTest.testHideInsertions	1PASS
0.00892133	jalview.gui.AnnotationChooserTest.testDeselectType_hideForAlljalview.gui.AnnotationChooserTest.testDeselectType_hideForAll	1PASS
0.00892133	jalview.gui.AnnotationColumnChooserTest.testResetjalview.gui.AnnotationColumnChooserTest.testReset	1PASS
0.00892133	jalview.gui.AnnotationChooserTest.testSelectType_hideForAlljalview.gui.AnnotationChooserTest.testSelectType_hideForAll	1PASS
0.00892133	jalview.analysis.AnnotationSorterTest.testNoSort_autocalcFirstjalview.analysis.AnnotationSorterTest.testNoSort_autocalcFirst	1PASS
0.00892133	jalview.gui.ColourMenuHelperTest.testAddMenuItems_nucleotidejalview.gui.ColourMenuHelperTest.testAddMenuItems_nucleotide	1PASS
0.00892133	jalview.project.Jalview2xmlTests.testTCoffeeScoresjalview.project.Jalview2xmlTests.testTCoffeeScores	1PASS
0.00892133	jalview.gui.AnnotationChooserTest.testDeselectType_hideForSelectedjalview.gui.AnnotationChooserTest.testDeselectType_hideForSelected	1PASS
0.00892133	jalview.project.Jalview2xmlTests.testStoreAndRecoverColourThresholdsjalview.project.Jalview2xmlTests.testStoreAndRecoverColourThresholds	1PASS
0.00729927	jalview.ext.jmol.JmolParserTest.testFileParserjalview.ext.jmol.JmolParserTest.testFileParser	1PASS
0.00486618	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_noSequenceSelectedjalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_noSequenceSelected	1PASS

Source view

* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)

* Copyright (C) $$Year-Rel$$ The Jalview Authors

* This file is part of Jalview.

* Jalview is free software: you can redistribute it and/or

* modify it under the terms of the GNU General Public License

* as published by the Free Software Foundation, either version 3

* of the License, or (at your option) any later version.

* Jalview is distributed in the hope that it will be useful, but

* WITHOUT ANY WARRANTY; without even the implied warranty

* of MERCHANTABILITY or FITNESS FOR A PARTICULAR

* PURPOSE. See the GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with Jalview. If not, see <http://www.gnu.org/licenses/>.

* The Jalview Authors are detailed in the 'AUTHORS' file.

package jalview.analysis;

import jalview.commands.RemoveGapColCommand;

import jalview.datamodel.AlignedCodon;

import jalview.datamodel.AlignedCodonFrame;

import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;

import jalview.datamodel.Alignment;

import jalview.datamodel.AlignmentAnnotation;

import jalview.datamodel.AlignmentI;

import jalview.datamodel.DBRefEntry;

import jalview.datamodel.GeneLociI;

import jalview.datamodel.IncompleteCodonException;

import jalview.datamodel.Mapping;

import jalview.datamodel.Sequence;

import jalview.datamodel.SequenceFeature;

import jalview.datamodel.SequenceGroup;

import jalview.datamodel.SequenceI;

import jalview.datamodel.features.SequenceFeatures;

import jalview.io.gff.SequenceOntologyI;

import jalview.schemes.ResidueProperties;

import jalview.util.Comparison;

import jalview.util.DBRefUtils;

import jalview.util.IntRangeComparator;

import jalview.util.MapList;

import jalview.util.MappingUtils;

import java.util.ArrayList;

import java.util.Arrays;

import java.util.Collection;

import java.util.Collections;

import java.util.HashMap;

import java.util.HashSet;

import java.util.Iterator;

import java.util.LinkedHashMap;

import java.util.List;

import java.util.Map;

import java.util.Map.Entry;

import java.util.NoSuchElementException;

import java.util.Set;

import java.util.SortedMap;

import java.util.TreeMap;

/**

* grab bag of useful alignment manipulation operations Expect these to be

* refactored elsewhere at some point.

* @author jimp

public class AlignmentUtils

{

private static final int CODON_LENGTH = 3;

private static final String SEQUENCE_VARIANT = "sequence_variant:";

* the 'id' attribute is provided for variant features fetched from

* Ensembl using its REST service with JSON format

public static final String VARIANT_ID = "id";

/**

* A data model to hold the 'normal' base value at a position, and an optional

* sequence variant feature

static final class DnaVariant

{

final String base;

SequenceFeature variant;

DnaVariant(String nuc)

{

base = nuc;

variant = null;

}

DnaVariant(String nuc, SequenceFeature var)

{

base = nuc;

variant = var;

}

public String getSource()

105

{

106

return variant == null ? null : variant.getFeatureGroup();

}

/**

* toString for aid in the debugger only

111

112

@Override

113

public String toString()

114

{

115

return base + ":" + (variant == null ? "" : variant.getDescription());

}

}

/**

* given an existing alignment, create a new alignment including all, or up to

121

* flankSize additional symbols from each sequence's dataset sequence

* @param core

* @param flankSize

* @return AlignmentI

public static AlignmentI expandContext(AlignmentI core, int flankSize)

128

{

129

List<SequenceI> sq = new ArrayList<>();

130

int maxoffset = 0;

131

for (SequenceI s : core.getSequences())

132

{

133

131

SequenceI newSeq = s.deriveSequence();

134

131

final int newSeqStart = newSeq.getStart() - 1;

135

131

if (newSeqStart > maxoffset

136

&& newSeq.getDatasetSequence().getStart() < s.getStart())

137

{

138

131

maxoffset = newSeqStart;

139

}

140

131

sq.add(newSeq);

}

if (flankSize > -1)

{

maxoffset = Math.min(maxoffset, flankSize);

}

* now add offset left and right to create an expanded alignment

149

150

for (SequenceI s : sq)

151

{

152

131

SequenceI ds = s;

153

262

while (ds.getDatasetSequence() != null)

154

{

155

131

ds = ds.getDatasetSequence();

156

}

157

131

int s_end = s.findPosition(s.getStart() + s.getLength());

158

// find available flanking residues for sequence

159

131

int ustream_ds = s.getStart() - ds.getStart();

160

131

int dstream_ds = ds.getEnd() - s_end;

161

162

// build new flanked sequence

163

164

// compute gap padding to start of flanking sequence

165

131

int offset = maxoffset - ustream_ds;

166

167

// padding is gapChar x ( maxoffset - min(ustream_ds, flank)

168

131

if (flankSize >= 0)

169

{

170

125

if (flankSize < ustream_ds)

171

{

172

// take up to flankSize residues

173

offset = maxoffset - flankSize;

174

ustream_ds = flankSize;

175

}

176

125

if (flankSize <= dstream_ds)

177

{

178

116

dstream_ds = flankSize - 1;

179

}

180

}

181

// TODO use Character.toLowerCase to avoid creating String objects?

182

131

char[] upstream = new String(ds

183

.getSequence(s.getStart() - 1 - ustream_ds, s.getStart() - 1))

184

.toLowerCase().toCharArray();

185

131

char[] downstream = new String(

186

ds.getSequence(s_end - 1, s_end + dstream_ds)).toLowerCase()

187

.toCharArray();

188

131

char[] coreseq = s.getSequence();

189

131

char[] nseq = new char[offset + upstream.length + downstream.length

190

+ coreseq.length];

191

131

char c = core.getGapCharacter();

192

193

131

int p = 0;

194

461

for (; p < offset; p++)

195

{

196

330

nseq[p] = c;

197

}

198

199

131

System.arraycopy(upstream, 0, nseq, p, upstream.length);

200

131

System.arraycopy(coreseq, 0, nseq, p + upstream.length,

201

coreseq.length);

202

131

System.arraycopy(downstream, 0, nseq,

203

p + coreseq.length + upstream.length, downstream.length);

204

131

s.setSequence(new String(nseq));

205

131

s.setStart(s.getStart() - ustream_ds);

206

131

s.setEnd(s_end + downstream.length);

207

}

208

AlignmentI newAl = new jalview.datamodel.Alignment(

209

sq.toArray(new SequenceI[0]));

210

for (SequenceI s : sq)

211

{

212

131

if (s.getAnnotation() != null)

213

{

214

for (AlignmentAnnotation aa : s.getAnnotation())

215

{

216

aa.adjustForAlignment(); // JAL-1712 fix

217

newAl.addAnnotation(aa);

}

}

}

newAl.setDataset(core.getDataset());

return newAl;

}

/**

* Returns the index (zero-based position) of a sequence in an alignment, or

* -1 if not found.

* @param al

* @param seq

* @return

57464

public static int getSequenceIndex(AlignmentI al, SequenceI seq)

234

{

235

57464

int result = -1;

236

57464

int pos = 0;

237

57464

for (SequenceI alSeq : al.getSequences())

238

{

239

126202503

if (alSeq == seq)

240

{

241

57462

result = pos;

242

57462

break;

243

}

244

126145041

pos++;

245

}

246

57463

return result;

}

/**

* Returns a map of lists of sequences in the alignment, keyed by sequence

251

* name. For use in mapping between different alignment views of the same

252

* sequences.

253

254

* @see jalview.datamodel.AlignmentI#getSequencesByName()

255

256

public static Map<String, List<SequenceI>> getSequencesByName(

257

AlignmentI al)

258

{

259

Map<String, List<SequenceI>> theMap = new LinkedHashMap<>();

260

for (SequenceI seq : al.getSequences())

261

{

262

String name = seq.getName();

263

if (name != null)

264

{

265

List<SequenceI> seqs = theMap.get(name);

266

if (seqs == null)

267

{

268

seqs = new ArrayList<>();

269

theMap.put(name, seqs);

}

seqs.add(seq);

}

}

return theMap;

}

/**

* Build mapping of protein to cDNA alignment. Mappings are made between

279

* sequences where the cDNA translates to the protein sequence. Any new

280

* mappings are added to the protein alignment. Returns true if any mappings

281

* either already exist or were added, else false.

282

283

* @param proteinAlignment

284

* @param cdnaAlignment

285

* @return

286

287

public static boolean mapProteinAlignmentToCdna(

288

final AlignmentI proteinAlignment, final AlignmentI cdnaAlignment)

289

{

290

if (proteinAlignment == null || cdnaAlignment == null)

{

return false;

}

Set<SequenceI> mappedDna = new HashSet<>();

296

Set<SequenceI> mappedProtein = new HashSet<>();

297

298

299

* First pass - map sequences where cross-references exist. This include

300

* 1-to-many mappings to support, for example, variant cDNA.

301

302

boolean mappingPerformed = mapProteinToCdna(proteinAlignment,

303

cdnaAlignment, mappedDna, mappedProtein, true);

304

305

306

* Second pass - map sequences where no cross-references exist. This only

307

* does 1-to-1 mappings and assumes corresponding sequences are in the same

308

* order in the alignments.

309

310

mappingPerformed |= mapProteinToCdna(proteinAlignment, cdnaAlignment,

311

mappedDna, mappedProtein, false);

312

return mappingPerformed;

}

/**

* Make mappings between compatible sequences (where the cDNA translation

317

* matches the protein).

318

319

* @param proteinAlignment

320

* @param cdnaAlignment

321

* @param mappedDna

322

* a set of mapped DNA sequences (to add to)

323

* @param mappedProtein

324

* a set of mapped Protein sequences (to add to)

325

* @param xrefsOnly

326

* if true, only map sequences where xrefs exist

327

* @return

328

329

protected static boolean mapProteinToCdna(

330

final AlignmentI proteinAlignment, final AlignmentI cdnaAlignment,

331

Set<SequenceI> mappedDna, Set<SequenceI> mappedProtein,

332

boolean xrefsOnly)

333

{

334

boolean mappingExistsOrAdded = false;

335

List<SequenceI> thisSeqs = proteinAlignment.getSequences();

336

for (SequenceI aaSeq : thisSeqs)

337

{

338

boolean proteinMapped = false;

339

AlignedCodonFrame acf = new AlignedCodonFrame();

340

341

for (SequenceI cdnaSeq : cdnaAlignment.getSequences())

342

{

343

344

* Always try to map if sequences have xref to each other; this supports

345

* variant cDNA or alternative splicing for a protein sequence.

346

347

* If no xrefs, try to map progressively, assuming that alignments have

348

* mappable sequences in corresponding order. These are not

349

* many-to-many, as that would risk mixing species with similar cDNA

350

* sequences.

351

352

if (xrefsOnly && !AlignmentUtils.haveCrossRef(aaSeq, cdnaSeq))

{

continue;

}

* Don't map non-xrefd sequences more than once each. This heuristic

359

* allows us to pair up similar sequences in ordered alignments.

360

361

if (!xrefsOnly && (mappedProtein.contains(aaSeq)

362

|| mappedDna.contains(cdnaSeq)))

{

continue;

}

if (mappingExists(proteinAlignment.getCodonFrames(),

367

aaSeq.getDatasetSequence(), cdnaSeq.getDatasetSequence()))

368

{

369

mappingExistsOrAdded = true;

}

else

{

MapList map = mapCdnaToProtein(aaSeq, cdnaSeq);

374

if (map != null)

375

{

376

acf.addMap(cdnaSeq, aaSeq, map);

377

mappingExistsOrAdded = true;

378

proteinMapped = true;

379

mappedDna.add(cdnaSeq);

380

mappedProtein.add(aaSeq);

}

}

}

if (proteinMapped)

{

proteinAlignment.addCodonFrame(acf);

387

}

388

}

389

return mappingExistsOrAdded;

}

/**

* Answers true if the mappings include one between the given (dataset)

394

* sequences.

395

396

protected static boolean mappingExists(List<AlignedCodonFrame> mappings,

397

SequenceI aaSeq, SequenceI cdnaSeq)

398

{

399

if (mappings != null)

400

{

401

for (AlignedCodonFrame acf : mappings)

402

{

403

if (cdnaSeq == acf.getDnaForAaSeq(aaSeq))

{

return true;

}

}

}

return false;

}

/**

* Builds a mapping (if possible) of a cDNA to a protein sequence.

414

* <ul>

415

* <li>first checks if the cdna translates exactly to the protein

416

* sequence</li>

417

* <li>else checks for translation after removing a STOP codon</li>

418

* <li>else checks for translation after removing a START codon</li>

419

* <li>if that fails, inspect CDS features on the cDNA sequence</li>

420

* </ul>

421

* Returns null if no mapping is determined.

422

423

* @param proteinSeq

424

* the aligned protein sequence

425

* @param cdnaSeq

426

* the aligned cdna sequence

427

* @return

428

429

public static MapList mapCdnaToProtein(SequenceI proteinSeq,

SequenceI cdnaSeq)

{

* Here we handle either dataset sequence set (desktop) or absent (applet).

434

* Use only the char[] form of the sequence to avoid creating possibly large

435

* String objects.

436

437

final SequenceI proteinDataset = proteinSeq.getDatasetSequence();

438

char[] aaSeqChars = proteinDataset != null

439

? proteinDataset.getSequence()

440

: proteinSeq.getSequence();

441

final SequenceI cdnaDataset = cdnaSeq.getDatasetSequence();

442

char[] cdnaSeqChars = cdnaDataset != null ? cdnaDataset.getSequence()

443

: cdnaSeq.getSequence();

444

if (aaSeqChars == null || cdnaSeqChars == null)

{

return null;

}

* cdnaStart/End, proteinStartEnd are base 1 (for dataset sequence mapping)

451

452

final int mappedLength = CODON_LENGTH * aaSeqChars.length;

453

int cdnaLength = cdnaSeqChars.length;

454

int cdnaStart = cdnaSeq.getStart();

455

int cdnaEnd = cdnaSeq.getEnd();

456

final int proteinStart = proteinSeq.getStart();

457

final int proteinEnd = proteinSeq.getEnd();

458

459

460

* If lengths don't match, try ignoring stop codon (if present)

461

462

if (cdnaLength != mappedLength && cdnaLength > 2)

463

{

464

String lastCodon = String.valueOf(cdnaSeqChars,

465

cdnaLength - CODON_LENGTH, CODON_LENGTH).toUpperCase();

466

for (String stop : ResidueProperties.STOP_CODONS)

467

{

468

if (lastCodon.equals(stop))

469

{

470

cdnaEnd -= CODON_LENGTH;

471

cdnaLength -= CODON_LENGTH;

break;

}

}

}

* If lengths still don't match, try ignoring start codon.

479

480

int startOffset = 0;

481

if (cdnaLength != mappedLength && cdnaLength > 2

482

&& String.valueOf(cdnaSeqChars, 0, CODON_LENGTH).toUpperCase()

483

.equals(ResidueProperties.START))

484

{

485

startOffset += CODON_LENGTH;

486

cdnaStart += CODON_LENGTH;

487

cdnaLength -= CODON_LENGTH;

488

}

489

490

if (translatesAs(cdnaSeqChars, startOffset, aaSeqChars))

491

{

492

493

* protein is translation of dna (+/- start/stop codons)

494

495

MapList map = new MapList(new int[] { cdnaStart, cdnaEnd },

496

new int[]

497

{ proteinStart, proteinEnd }, CODON_LENGTH, 1);

return map;

}

* translation failed - try mapping CDS annotated regions of dna

503

504

return mapCdsToProtein(cdnaSeq, proteinSeq);

}

/**

* Test whether the given cdna sequence, starting at the given offset,

509

* translates to the given amino acid sequence, using the standard translation

510

* table. Designed to fail fast i.e. as soon as a mismatch position is found.

511

512

* @param cdnaSeqChars

* @param cdnaStart

* @param aaSeqChars

* @return

protected static boolean translatesAs(char[] cdnaSeqChars, int cdnaStart,

518

char[] aaSeqChars)

519

{

520

if (cdnaSeqChars == null || aaSeqChars == null)

{

return false;

}

int aaPos = 0;

int dnaPos = cdnaStart;

527

151

for (; dnaPos < cdnaSeqChars.length - 2

528

&& aaPos < aaSeqChars.length; dnaPos += CODON_LENGTH, aaPos++)

529

{

530

120

String codon = String.valueOf(cdnaSeqChars, dnaPos, CODON_LENGTH);

531

120

final String translated = ResidueProperties.codonTranslate(codon);

532

533

534

* allow * in protein to match untranslatable in dna

535

536

120

final char aaRes = aaSeqChars[aaPos];

537

120

if ((translated == null || ResidueProperties.STOP.equals(translated))

&& aaRes == '*')

{

continue;

}

116

if (translated == null || !(aaRes == translated.charAt(0)))

543

{

544

// debug

545

// System.out.println(("Mismatch at " + i + "/" + aaResidue + ": "

546

// + codon + "(" + translated + ") != " + aaRes));

return false;

}

}

* check we matched all of the protein sequence

553

554

if (aaPos != aaSeqChars.length)

{

return false;

}

* check we matched all of the dna except

561

* for optional trailing STOP codon

562

563

if (dnaPos == cdnaSeqChars.length)

{

return true;

}

if (dnaPos == cdnaSeqChars.length - CODON_LENGTH)

568

{

569

String codon = String.valueOf(cdnaSeqChars, dnaPos, CODON_LENGTH);

570

if (ResidueProperties.STOP

571

.equals(ResidueProperties.codonTranslate(codon)))

{

return true;

}

}

return false;

}

/**

* Align sequence 'seq' to match the alignment of a mapped sequence. Note this

581

* currently assumes that we are aligning cDNA to match protein.

582

583

* @param seq

584

* the sequence to be realigned

585

* @param al

586

* the alignment whose sequence alignment is to be 'copied'

587

* @param gap

588

* character string represent a gap in the realigned sequence

589

* @param preserveUnmappedGaps

590

* @param preserveMappedGaps

591

* @return true if the sequence was realigned, false if it could not be

592

593

public static boolean alignSequenceAs(SequenceI seq, AlignmentI al,

594

String gap, boolean preserveMappedGaps,

595

boolean preserveUnmappedGaps)

596

{

597

598

* Get any mappings from the source alignment to the target (dataset)

599

* sequence.

600

601

// TODO there may be one AlignedCodonFrame per dataset sequence, or one with

602

// all mappings. Would it help to constrain this?

603

List<AlignedCodonFrame> mappings = al.getCodonFrame(seq);

604

if (mappings == null || mappings.isEmpty())

{

return false;

}

* Locate the aligned source sequence whose dataset sequence is mapped. We

611

* just take the first match here (as we can't align like more than one

612

* sequence).

613

614

SequenceI alignFrom = null;

615

AlignedCodonFrame mapping = null;

616

for (AlignedCodonFrame mp : mappings)

617

{

618

alignFrom = mp.findAlignedSequence(seq, al);

619

if (alignFrom != null)

{

mapping = mp;

break;

}

}

if (alignFrom == null)

{

return false;

}

alignSequenceAs(seq, alignFrom, mapping, gap, al.getGapCharacter(),

631

preserveMappedGaps, preserveUnmappedGaps);

return true;

}

/**

* Align sequence 'alignTo' the same way as 'alignFrom', using the mapping to

637

* match residues and codons. Flags control whether existing gaps in unmapped

638

* (intron) and mapped (exon) regions are preserved or not. Gaps between

639

* intron and exon are only retained if both flags are set.

* @param alignTo

* @param alignFrom

* @param mapping

* @param myGap

* @param sourceGap

* @param preserveUnmappedGaps

647

* @param preserveMappedGaps

648

649

public static void alignSequenceAs(SequenceI alignTo, SequenceI alignFrom,

650

AlignedCodonFrame mapping, String myGap, char sourceGap,

651

boolean preserveMappedGaps, boolean preserveUnmappedGaps)

652

{

653

// TODO generalise to work for Protein-Protein, dna-dna, dna-protein

654

655

// aligned and dataset sequence positions, all base zero

int thisSeqPos = 0;

int sourceDsPos = 0;

int basesWritten = 0;

660

char myGapChar = myGap.charAt(0);

661

int ratio = myGap.length();

662

663

int fromOffset = alignFrom.getStart() - 1;

664

int toOffset = alignTo.getStart() - 1;

665

int sourceGapMappedLength = 0;

666

boolean inExon = false;

667

final int toLength = alignTo.getLength();

668

final int fromLength = alignFrom.getLength();

669

StringBuilder thisAligned = new StringBuilder(2 * toLength);

670

671

672

* Traverse the 'model' aligned sequence

673

674

205

for (int i = 0; i < fromLength; i++)

675

{

676

186

char sourceChar = alignFrom.getCharAt(i);

677

186

if (sourceChar == sourceGap)

678

{

679

sourceGapMappedLength += ratio;

continue;

}

* Found a non-gap character. Locate its mapped region if any.

685

686

142

sourceDsPos++;

687

// Note mapping positions are base 1, our sequence positions base 0

688

142

int[] mappedPos = mapping.getMappedRegion(alignTo, alignFrom,

689

sourceDsPos + fromOffset);

690

142

if (mappedPos == null)

691

{

692

693

* unmapped position; treat like a gap

694

695

sourceGapMappedLength += ratio;

696

// System.err.println("Can't align: no codon mapping to residue "

697

// + sourceDsPos + "(" + sourceChar + ")");

// return;

continue;

}

int mappedCodonStart = mappedPos[0]; // position (1...) of codon start

703

int mappedCodonEnd = mappedPos[mappedPos.length - 1]; // codon end pos

704

StringBuilder trailingCopiedGap = new StringBuilder();

705

706

707

* Copy dna sequence up to and including this codon. Optionally, include

708

* gaps before the codon starts (in introns) and/or after the codon starts

709

* (in exons).

710

711

* Note this only works for 'linear' splicing, not reverse or interleaved.

712

* But then 'align dna as protein' doesn't make much sense otherwise.

713

714

int intronLength = 0;

715

294

while (basesWritten + toOffset < mappedCodonEnd

716

&& thisSeqPos < toLength)

717

{

718

246

final char c = alignTo.getCharAt(thisSeqPos++);

719

246

if (c != myGapChar)

720

{

721

146

basesWritten++;

722

146

int sourcePosition = basesWritten + toOffset;

723

146

if (sourcePosition < mappedCodonStart)

724

{

725

726

* Found an unmapped (intron) base. First add in any preceding gaps

727

* (if wanted).

728

729

if (preserveUnmappedGaps && trailingCopiedGap.length() > 0)

730

{

731

thisAligned.append(trailingCopiedGap.toString());

732

intronLength += trailingCopiedGap.length();

733

trailingCopiedGap = new StringBuilder();

}

intronLength++;

inExon = false;

}

else

{

final boolean startOfCodon = sourcePosition == mappedCodonStart;

741

int gapsToAdd = calculateGapsToInsert(preserveMappedGaps,

742

preserveUnmappedGaps, sourceGapMappedLength, inExon,

743

trailingCopiedGap.length(), intronLength, startOfCodon);

744

215

for (int k = 0; k < gapsToAdd; k++)

745

{

746

117

thisAligned.append(myGapChar);

747

}

748

sourceGapMappedLength = 0;

749

inExon = true;

750

}

751

146

thisAligned.append(c);

752

146

trailingCopiedGap = new StringBuilder();

}

else

{

100

if (inExon && preserveMappedGaps)

757

{

758

trailingCopiedGap.append(myGapChar);

759

}

760

else if (!inExon && preserveUnmappedGaps)

761

{

762

trailingCopiedGap.append(myGapChar);

}

}

}

}

* At end of model aligned sequence. Copy any remaining target sequence, optionally

770

* including (intron) gaps.

771

772

129

while (thisSeqPos < toLength)

773

{

774

110

final char c = alignTo.getCharAt(thisSeqPos++);

775

110

if (c != myGapChar || preserveUnmappedGaps)

776

{

777

102

thisAligned.append(c);

778

}

779

110

sourceGapMappedLength--;

}

* finally add gaps to pad for any trailing source gaps or

784

* unmapped characters

785

786

if (preserveUnmappedGaps)

787

{

788

while (sourceGapMappedLength > 0)

789

{

790

thisAligned.append(myGapChar);

791

sourceGapMappedLength--;

}

}

* All done aligning, set the aligned sequence.

797

798

alignTo.setSequence(new String(thisAligned));

}

/**

* Helper method to work out how many gaps to insert when realigning.

803

804

* @param preserveMappedGaps

805

* @param preserveUnmappedGaps

806

* @param sourceGapMappedLength

807

* @param inExon

808

* @param trailingCopiedGap

809

* @param intronLength

810

* @param startOfCodon

811

* @return

812

813

protected static int calculateGapsToInsert(boolean preserveMappedGaps,

814

boolean preserveUnmappedGaps, int sourceGapMappedLength,

815

boolean inExon, int trailingGapLength, int intronLength,

816

final boolean startOfCodon)

{

int gapsToAdd = 0;

if (startOfCodon)

{

* Reached start of codon. Ignore trailing gaps in intron unless we are

823

* preserving gaps in both exon and intron. Ignore them anyway if the

824

* protein alignment introduces a gap at least as large as the intronic

825

* region.

826

827

if (inExon && !preserveMappedGaps)

828

{

829

trailingGapLength = 0;

830

}

831

if (!inExon && !(preserveMappedGaps && preserveUnmappedGaps))

832

{

833

trailingGapLength = 0;

}

if (inExon)

{

gapsToAdd = Math.max(sourceGapMappedLength, trailingGapLength);

}

else

{

if (intronLength + trailingGapLength <= sourceGapMappedLength)

842

{

843

gapsToAdd = sourceGapMappedLength - intronLength;

}

else

{

gapsToAdd = Math.min(

848

intronLength + trailingGapLength - sourceGapMappedLength,

trailingGapLength);

}

}

}

else

{

* second or third base of codon; check for any gaps in dna

857

858

if (!preserveMappedGaps)

859

{

860

trailingGapLength = 0;

861

}

862

gapsToAdd = Math.max(sourceGapMappedLength, trailingGapLength);

}

return gapsToAdd;

}

/**

* Realigns the given protein to match the alignment of the dna, using codon

869

* mappings to translate aligned codon positions to protein residues.

870

871

* @param protein

872

* the alignment whose sequences are realigned by this method

873

* @param dna

874

* the dna alignment whose alignment we are 'copying'

875

* @return the number of sequences that were realigned

876

877

public static int alignProteinAsDna(AlignmentI protein, AlignmentI dna)

878

{

879

if (protein.isNucleotide() || !dna.isNucleotide())

880

{

881

System.err.println("Wrong alignment type in alignProteinAsDna");

882

return 0;

883

}

884

List<SequenceI> unmappedProtein = new ArrayList<>();

885

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons = buildCodonColumnsMap(

886

protein, dna, unmappedProtein);

887

return alignProteinAs(protein, alignedCodons, unmappedProtein);

}

/**

* Realigns the given dna to match the alignment of the protein, using codon

892

* mappings to translate aligned peptide positions to codons.

893

894

* Always produces a padded CDS alignment.

895

896

* @param dna

897

* the alignment whose sequences are realigned by this method

898

* @param protein

899

* the protein alignment whose alignment we are 'copying'

900

* @return the number of sequences that were realigned

901

902

public static int alignCdsAsProtein(AlignmentI dna, AlignmentI protein)

903

{

904

if (protein.isNucleotide() || !dna.isNucleotide())

905

{

906

System.err.println("Wrong alignment type in alignProteinAsDna");

907

return 0;

908

}

909

// todo: implement this

910

List<AlignedCodonFrame> mappings = protein.getCodonFrames();

911

int alignedCount = 0;

912

int width = 0; // alignment width for padding CDS

913

for (SequenceI dnaSeq : dna.getSequences())

914

{

915

if (alignCdsSequenceAsProtein(dnaSeq, protein, mappings,

916

dna.getGapCharacter()))

{

alignedCount++;

}

width = Math.max(dnaSeq.getLength(), width);

}

int oldwidth;

int diff;

for (SequenceI dnaSeq : dna.getSequences())

925

{

926

oldwidth = dnaSeq.getLength();

927

diff = width - oldwidth;

928

if (diff > 0)

929

{

930

dnaSeq.insertCharAt(oldwidth, diff, dna.getGapCharacter());

}

}

return alignedCount;

}

/**

* Helper method to align (if possible) the dna sequence to match the

938

* alignment of a mapped protein sequence. This is currently limited to

939

* handling coding sequence only.

* @param cdsSeq

* @param protein

* @param mappings

* @param gapChar

* @return

static boolean alignCdsSequenceAsProtein(SequenceI cdsSeq,

948

AlignmentI protein, List<AlignedCodonFrame> mappings,

949

char gapChar)

950

{

951

SequenceI cdsDss = cdsSeq.getDatasetSequence();

if (cdsDss == null)

{

System.err

.println("alignCdsSequenceAsProtein needs aligned sequence!");

return false;

}

List<AlignedCodonFrame> dnaMappings = MappingUtils

960

.findMappingsForSequence(cdsSeq, mappings);

961

for (AlignedCodonFrame mapping : dnaMappings)

962

{

963

SequenceI peptide = mapping.findAlignedSequence(cdsSeq, protein);

964

if (peptide != null)

965

{

966

final int peptideLength = peptide.getLength();

967

Mapping map = mapping.getMappingBetween(cdsSeq, peptide);

968

if (map != null)

969

{

970

MapList mapList = map.getMap();

971

if (map.getTo() == peptide.getDatasetSequence())

972

{

973

mapList = mapList.getInverse();

974

}

975

final int cdsLength = cdsDss.getLength();

976

int mappedFromLength = MappingUtils.getLength(mapList

977

.getFromRanges());

978

int mappedToLength = MappingUtils

979

.getLength(mapList.getToRanges());

980

boolean addStopCodon = (cdsLength == mappedFromLength

981

* CODON_LENGTH + CODON_LENGTH)

982

|| (peptide.getDatasetSequence()

983

.getLength() == mappedFromLength - 1);

984

if (cdsLength != mappedToLength && !addStopCodon)

985

{

986

System.err.println(String.format(

987

"Can't align cds as protein (length mismatch %d/%d): %s",

988

cdsLength, mappedToLength, cdsSeq.getName()));

}

* pre-fill the aligned cds sequence with gaps

993

994

char[] alignedCds = new char[peptideLength * CODON_LENGTH

995

+ (addStopCodon ? CODON_LENGTH : 0)];

996

Arrays.fill(alignedCds, gapChar);

997

998

999

* walk over the aligned peptide sequence and insert mapped

1000

* codons for residues in the aligned cds sequence

1001

1002

int copiedBases = 0;

1003

int cdsStart = cdsDss.getStart();

1004

int proteinPos = peptide.getStart() - 1;

1005

int cdsCol = 0;

1006

1007

for (int col = 0; col < peptideLength; col++)

1008

{

1009

char residue = peptide.getCharAt(col);

1010

1011

if (Comparison.isGap(residue))

1012

{

1013

cdsCol += CODON_LENGTH;

}

else

{

proteinPos++;

int[] codon = mapList.locateInTo(proteinPos, proteinPos);

1019

if (codon == null)

1020

{

1021

// e.g. incomplete start codon, X in peptide

1022

cdsCol += CODON_LENGTH;

}

else

{

for (int j = codon[0]; j <= codon[1]; j++)

1027

{

1028

char mappedBase = cdsDss.getCharAt(j - cdsStart);

1029

alignedCds[cdsCol++] = mappedBase;

copiedBases++;

}

}

}

}

* append stop codon if not mapped from protein,

1038

* closing it up to the end of the mapped sequence

1039

1040

if (copiedBases == cdsLength - CODON_LENGTH)

1041

{

1042

for (int i = alignedCds.length - 1; i >= 0; i--)

1043

{

1044

if (!Comparison.isGap(alignedCds[i]))

1045

{

1046

cdsCol = i + 1; // gap just after end of sequence

break;

}

}

for (int i = cdsLength - CODON_LENGTH; i < cdsLength; i++)

1051

{

1052

alignedCds[cdsCol++] = cdsDss.getCharAt(i);

1053

}

1054

}

1055

cdsSeq.setSequence(new String(alignedCds));

return true;

}

}

}

return false;

}

/**

* Builds a map whose key is an aligned codon position (3 alignment column

1065

* numbers base 0), and whose value is a map from protein sequence to each

1066

* protein's peptide residue for that codon. The map generates an ordering of

1067

* the codons, and allows us to read off the peptides at each position in

1068

* order to assemble 'aligned' protein sequences.

1069

1070

* @param protein

1071

* the protein alignment

1072

* @param dna

1073

* the coding dna alignment

1074

* @param unmappedProtein

1075

* any unmapped proteins are added to this list

1076

* @return

1077

1078

protected static Map<AlignedCodon, Map<SequenceI, AlignedCodon>> buildCodonColumnsMap(

1079

AlignmentI protein, AlignmentI dna,

1080

List<SequenceI> unmappedProtein)

1081

{

1082

1083

* maintain a list of any proteins with no mappings - these will be

1084

* rendered 'as is' in the protein alignment as we can't align them

1085

1086

unmappedProtein.addAll(protein.getSequences());

1087

1088

List<AlignedCodonFrame> mappings = protein.getCodonFrames();

1089

1090

1091

* Map will hold, for each aligned codon position e.g. [3, 5, 6], a map of

1092

* {dnaSequence, {proteinSequence, codonProduct}} at that position. The

1093

* comparator keeps the codon positions ordered.

1094

1095

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons = new TreeMap<>(

1096

new CodonComparator());

1097

1098

for (SequenceI dnaSeq : dna.getSequences())

1099

{

1100

for (AlignedCodonFrame mapping : mappings)

1101

{

1102

274

SequenceI prot = mapping.findAlignedSequence(dnaSeq, protein);

1103

274

if (prot != null)

1104

{

1105

Mapping seqMap = mapping.getMappingForSequence(dnaSeq);

1106

addCodonPositions(dnaSeq, prot, protein.getGapCharacter(), seqMap,

1107

alignedCodons);

1108

unmappedProtein.remove(prot);

}

}

}

* Finally add any unmapped peptide start residues (e.g. for incomplete

1115

* codons) as if at the codon position before the second residue

1116

1117

// TODO resolve JAL-2022 so this fudge can be removed

1118

int mappedSequenceCount = protein.getHeight() - unmappedProtein.size();

1119

addUnmappedPeptideStarts(alignedCodons, mappedSequenceCount);

1120

1121

return alignedCodons;

}

/**

* Scans for any protein mapped from position 2 (meaning unmapped start

1126

* position e.g. an incomplete codon), and synthesizes a 'codon' for it at the

1127

* preceding position in the alignment

1128

1129

* @param alignedCodons

1130

* the codon-to-peptide map

1131

* @param mappedSequenceCount

1132

* the number of distinct sequences in the map

1133

1134

protected static void addUnmappedPeptideStarts(

1135

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons,

1136

int mappedSequenceCount)

1137

{

1138

// TODO delete this ugly hack once JAL-2022 is resolved

1139

// i.e. we can model startPhase > 0 (incomplete start codon)

1140

1141

List<SequenceI> sequencesChecked = new ArrayList<>();

1142

AlignedCodon lastCodon = null;

1143

Map<SequenceI, AlignedCodon> toAdd = new HashMap<>();

1144

1145

for (Entry<AlignedCodon, Map<SequenceI, AlignedCodon>> entry : alignedCodons

1146

.entrySet())

1147

{

1148

1913

for (Entry<SequenceI, AlignedCodon> sequenceCodon : entry.getValue()

1149

.entrySet())

1150

{

1151

10661

SequenceI seq = sequenceCodon.getKey();

1152

10661

if (sequencesChecked.contains(seq))

1153

{

1154

10631

continue;

1155

}

1156

sequencesChecked.add(seq);

1157

AlignedCodon codon = sequenceCodon.getValue();

1158

if (codon.peptideCol > 1)

1159

{

1160

System.err.println(

1161

"Problem mapping protein with >1 unmapped start positions: "

1162

+ seq.getName());

1163

}

1164

else if (codon.peptideCol == 1)

1165

{

1166

1167

* first position (peptideCol == 0) was unmapped - add it

1168

1169

if (lastCodon != null)

1170

{

1171

AlignedCodon firstPeptide = new AlignedCodon(lastCodon.pos1,

1172

lastCodon.pos2, lastCodon.pos3,

1173

String.valueOf(seq.getCharAt(0)), 0);

1174

toAdd.put(seq, firstPeptide);

}

else

{

* unmapped residue at start of alignment (no prior column) -

1180

* 'insert' at nominal codon [0, 0, 0]

1181

1182

AlignedCodon firstPeptide = new AlignedCodon(0, 0, 0,

1183

String.valueOf(seq.getCharAt(0)), 0);

1184

toAdd.put(seq, firstPeptide);

1185

}

1186

}

1187

if (sequencesChecked.size() == mappedSequenceCount)

1188

{

1189

// no need to check past first mapped position in all sequences

break;

}

}

1913

lastCodon = entry.getKey();

}

* add any new codons safely after iterating over the map

1198

1199

for (Entry<SequenceI, AlignedCodon> startCodon : toAdd.entrySet())

1200

{

1201

addCodonToMap(alignedCodons, startCodon.getValue(),

1202

startCodon.getKey());

}

}

/**

* Update the aligned protein sequences to match the codon alignments given in

* the map.

* @param protein

* @param alignedCodons

1212

* an ordered map of codon positions (columns), with sequence/peptide

1213

* values present in each column

1214

* @param unmappedProtein

1215

* @return

1216

1217

protected static int alignProteinAs(AlignmentI protein,

1218

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons,

1219

List<SequenceI> unmappedProtein)

1220

{

1221

1222

* prefill peptide sequences with gaps

1223

1224

int alignedWidth = alignedCodons.size();

1225

char[] gaps = new char[alignedWidth];

1226

Arrays.fill(gaps, protein.getGapCharacter());

1227

Map<SequenceI, char[]> peptides = new HashMap<>();

1228

for (SequenceI seq : protein.getSequences())

1229

{

1230

if (!unmappedProtein.contains(seq))

1231

{

1232

peptides.put(seq, Arrays.copyOf(gaps, gaps.length));

}

}

* Traverse the codons left to right (as defined by CodonComparator)

1238

* and insert peptides in each column where the sequence is mapped.

1239

* This gives a peptide 'alignment' where residues are aligned if their

1240

* corresponding codons occupy the same columns in the cdna alignment.

1241

1242

int column = 0;

1243

for (AlignedCodon codon : alignedCodons.keySet())

1244

{

1245

1914

final Map<SequenceI, AlignedCodon> columnResidues = alignedCodons

1246

.get(codon);

1247

1914

for (Entry<SequenceI, AlignedCodon> entry : columnResidues.entrySet())

1248

{

1249

10682

char residue = entry.getValue().product.charAt(0);

1250

10682

peptides.get(entry.getKey())[column] = residue;

1251

}

1252

1914

column++;

}

* and finally set the constructed sequences

1257

1258

for (Entry<SequenceI, char[]> entry : peptides.entrySet())

1259

{

1260

entry.getKey().setSequence(new String(entry.getValue()));

}

return 0;

}

/**

* Populate the map of aligned codons by traversing the given sequence

1268

* mapping, locating the aligned positions of mapped codons, and adding those

1269

* positions and their translation products to the map.

1270

1271

* @param dna

1272

* the aligned sequence we are mapping from

1273

* @param protein

1274

* the sequence to be aligned to the codons

1275

* @param gapChar

1276

* the gap character in the dna sequence

1277

* @param seqMap

1278

* a mapping to a sequence translation

1279

* @param alignedCodons

1280

* the map we are building up

1281

1282

static void addCodonPositions(SequenceI dna, SequenceI protein,

1283

char gapChar, Mapping seqMap,

1284

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons)

1285

{

1286

Iterator<AlignedCodon> codons = seqMap.getCodonIterator(dna, gapChar);

1287

1288

1289

* add codon positions, and their peptide translations, to the alignment

1290

* map, while remembering the first codon mapped

1291

1292

10716

while (codons.hasNext())

1293

{

1294

10684

try

1295

{

1296

10684

AlignedCodon codon = codons.next();

1297

10684

addCodonToMap(alignedCodons, codon, protein);

1298

} catch (IncompleteCodonException e)

1299

{

1300

// possible incomplete trailing codon - ignore

1301

} catch (NoSuchElementException e)

1302

{

1303

// possibly peptide lacking STOP

}

}

}

/**

* Helper method to add a codon-to-peptide entry to the aligned codons map

1310

1311

* @param alignedCodons

* @param codon

* @param protein

10690

protected static void addCodonToMap(

1316

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons,

1317

AlignedCodon codon, SequenceI protein)

1318

{

1319

10690

Map<SequenceI, AlignedCodon> seqProduct = alignedCodons.get(codon);

1320

10690

if (seqProduct == null)

1321

{

1322

1914

seqProduct = new HashMap<>();

1323

1914

alignedCodons.put(codon, seqProduct);

1324

}

1325

10690

seqProduct.put(protein, codon);

}

/**

* Returns true if a cDNA/Protein mapping either exists, or could be made,

1330

* between at least one pair of sequences in the two alignments. Currently,

1331

* the logic is:

1332

* <ul>

1333

* <li>One alignment must be nucleotide, and the other protein</li>

1334

* <li>At least one pair of sequences must be already mapped, or mappable</li>

1335

* <li>Mappable means the nucleotide translation matches the protein

1336

* sequence</li>

1337

* <li>The translation may ignore start and stop codons if present in the

* nucleotide</li>

* </ul>

* @param al1

* @param al2

* @return

public static boolean isMappable(AlignmentI al1, AlignmentI al2)

1346

{

1347

if (al1 == null || al2 == null)

{

return false;

}

* Require one nucleotide and one protein

1354

1355

if (al1.isNucleotide() == al2.isNucleotide())

{

return false;

}

AlignmentI dna = al1.isNucleotide() ? al1 : al2;

1360

AlignmentI protein = dna == al1 ? al2 : al1;

1361

List<AlignedCodonFrame> mappings = protein.getCodonFrames();

1362

for (SequenceI dnaSeq : dna.getSequences())

1363

{

1364

for (SequenceI proteinSeq : protein.getSequences())

1365

{

1366

if (isMappable(dnaSeq, proteinSeq, mappings))

{

return true;

}

}

}

return false;

}

/**

* Returns true if the dna sequence is mapped, or could be mapped, to the

* protein sequence.

* @param dnaSeq

* @param proteinSeq

* @param mappings

* @return

protected static boolean isMappable(SequenceI dnaSeq,

1385

SequenceI proteinSeq, List<AlignedCodonFrame> mappings)

1386

{

1387

if (dnaSeq == null || proteinSeq == null)

{

return false;

}

SequenceI dnaDs = dnaSeq.getDatasetSequence() == null ? dnaSeq

1393

: dnaSeq.getDatasetSequence();

1394

SequenceI proteinDs = proteinSeq.getDatasetSequence() == null

1395

? proteinSeq

1396

: proteinSeq.getDatasetSequence();

1397

1398

for (AlignedCodonFrame mapping : mappings)

1399

{

1400

if (proteinDs == mapping.getAaForDnaSeq(dnaDs))

{

* already mapped

return true;

}

}

* Just try to make a mapping (it is not yet stored), test whether

1411

* successful.

1412

1413

return mapCdnaToProtein(proteinDs, dnaDs) != null;

}

/**

* Finds any reference annotations associated with the sequences in

1418

* sequenceScope, that are not already added to the alignment, and adds them

1419

* to the 'candidates' map. Also populates a lookup table of annotation

1420

* labels, keyed by calcId, for use in constructing tooltips or the like.

1421

1422

* @param sequenceScope

1423

* the sequences to scan for reference annotations

1424

* @param labelForCalcId

1425

* (optional) map to populate with label for calcId

1426

* @param candidates

1427

* map to populate with annotations for sequence

1428

* @param al

1429

* the alignment to check for presence of annotations

1430

1431

public static void findAddableReferenceAnnotations(

1432

List<SequenceI> sequenceScope, Map<String, String> labelForCalcId,

1433

final Map<SequenceI, List<AlignmentAnnotation>> candidates,

1434

AlignmentI al)

1435

{

1436

if (sequenceScope == null)

{

return;

}

* For each sequence in scope, make a list of any annotations on the

1443

* underlying dataset sequence which are not already on the alignment.

1444

1445

* Add to a map of { alignmentSequence, <List of annotations to add> }

1446

1447

for (SequenceI seq : sequenceScope)

1448

{

1449

SequenceI dataset = seq.getDatasetSequence();

if (dataset == null)

{

continue;

}

AlignmentAnnotation[] datasetAnnotations = dataset.getAnnotation();

1455

if (datasetAnnotations == null)

{

continue;

}

final List<AlignmentAnnotation> result = new ArrayList<>();

1460

for (AlignmentAnnotation dsann : datasetAnnotations)

1461

{

1462

1463

* Find matching annotations on the alignment. If none is found, then

1464

* add this annotation to the list of 'addable' annotations for this

1465

* sequence.

1466

1467

final Iterable<AlignmentAnnotation> matchedAlignmentAnnotations = al

1468

.findAnnotations(seq, dsann.getCalcId(), dsann.label);

1469

if (!matchedAlignmentAnnotations.iterator().hasNext())

1470

{

1471

result.add(dsann);

1472

if (labelForCalcId != null)

1473

{

1474

labelForCalcId.put(dsann.getCalcId(), dsann.label);

}

}

}

* Save any addable annotations for this sequence

1480

1481

if (!result.isEmpty())

1482

{

1483

candidates.put(seq, result);

}

}

}

/**

* Adds annotations to the top of the alignment annotations, in the same order

1490

* as their related sequences.

1491

1492

* @param annotations

1493

* the annotations to add

1494

* @param alignment

1495

* the alignment to add them to

1496

* @param selectionGroup

1497

* current selection group (or null if none)

1498

1499

public static void addReferenceAnnotations(

1500

Map<SequenceI, List<AlignmentAnnotation>> annotations,

1501

final AlignmentI alignment, final SequenceGroup selectionGroup)

1502

{

1503

for (SequenceI seq : annotations.keySet())

1504

{

1505

for (AlignmentAnnotation ann : annotations.get(seq))

1506

{

1507

AlignmentAnnotation copyAnn = new AlignmentAnnotation(ann);

1508

int startRes = 0;

1509

int endRes = ann.annotations.length;

1510

if (selectionGroup != null)

1511

{

1512

startRes = selectionGroup.getStartRes();

1513

endRes = selectionGroup.getEndRes();

1514

}

1515

copyAnn.restrict(startRes, endRes);

1516

1517

1518

* Add to the sequence (sets copyAnn.datasetSequence), unless the

1519

* original annotation is already on the sequence.

1520

1521

if (!seq.hasAnnotation(ann))

1522

{

1523

seq.addAlignmentAnnotation(copyAnn);

1524

}

1525

// adjust for gaps

1526

copyAnn.adjustForAlignment();

1527

// add to the alignment and set visible

1528

alignment.addAnnotation(copyAnn);

1529

copyAnn.visible = true;

}

}

}

/**

* Set visibility of alignment annotations of specified types (labels), for

1536

* specified sequences. This supports controls like "Show all secondary

1537

* structure", "Hide all Temp factor", etc.

1538

1539

* @al the alignment to scan for annotations

1540

* @param types

1541

* the types (labels) of annotations to be updated

1542

* @param forSequences

1543

* if not null, only annotations linked to one of these sequences are

1544

* in scope for update; if null, acts on all sequence annotations

1545

* @param anyType

1546

* if this flag is true, 'types' is ignored (label not checked)

1547

* @param doShow

1548

* if true, set visibility on, else set off

1549

1550

public static void showOrHideSequenceAnnotations(AlignmentI al,

1551

Collection<String> types, List<SequenceI> forSequences,

1552

boolean anyType, boolean doShow)

1553

{

1554

AlignmentAnnotation[] anns = al.getAlignmentAnnotation();

1555

if (anns != null)

1556

{

1557

for (AlignmentAnnotation aa : anns)

1558

{

1559

if (anyType || types.contains(aa.label))

1560

{

1561

if ((aa.sequenceRef != null) && (forSequences == null

1562

|| forSequences.contains(aa.sequenceRef)))

{

aa.visible = doShow;

}

}

}

}

}

/**

* Returns true if either sequence has a cross-reference to the other

* @param seq1

* @param seq2

* @return

public static boolean haveCrossRef(SequenceI seq1, SequenceI seq2)

1579

{

1580

// Note: moved here from class CrossRef as the latter class has dependencies

1581

// not availability to the applet's classpath

1582

return hasCrossRef(seq1, seq2) || hasCrossRef(seq2, seq1);

}

/**

* Returns true if seq1 has a cross-reference to seq2. Currently this assumes

1587

* that sequence name is structured as Source|AccessionId.

* @param seq1

* @param seq2

* @return

108

public static boolean hasCrossRef(SequenceI seq1, SequenceI seq2)

1594

{

1595

108

if (seq1 == null || seq2 == null)

{

return false;

}

100

String name = seq2.getName();

1600

100

final List<DBRefEntry> xrefs = seq1.getDBRefs();

1601

100

if (xrefs != null)

1602

{

1603

for (int ix = 0, nx = xrefs.size(); ix < nx; ix++)

1604

{

1605

DBRefEntry xref = xrefs.get(ix);

1606

String xrefName = xref.getSource() + "|" + xref.getAccessionId();

1607

// case-insensitive test, consistent with DBRefEntry.equalRef()

1608

if (xrefName.equalsIgnoreCase(name))

{

return true;

}

}

}

return false;

}

/**

* Constructs an alignment consisting of the mapped (CDS) regions in the given

1619

* nucleotide sequences, and updates mappings to match. The CDS sequences are

1620

* added to the original alignment's dataset, which is shared by the new

1621

* alignment. Mappings from nucleotide to CDS, and from CDS to protein, are

1622

* added to the alignment dataset.

1623

1624

* @param dna

1625

* aligned nucleotide (dna or cds) sequences

1626

* @param dataset

1627

* the alignment dataset the sequences belong to

1628

* @param products

1629

* (optional) to restrict results to CDS that map to specified

1630

* protein products

1631

* @return an alignment whose sequences are the cds-only parts of the dna

1632

* sequences (or null if no mappings are found)

1633

1634

public static AlignmentI makeCdsAlignment(SequenceI[] dna,

1635

AlignmentI dataset, SequenceI[] products)

1636

{

1637

if (dataset == null || dataset.getDataset() != null)

1638

{

1639

throw new IllegalArgumentException(

1640

"IMPLEMENTATION ERROR: dataset.getDataset() must be null!");

1641

}

1642

List<SequenceI> foundSeqs = new ArrayList<>();

1643

List<SequenceI> cdsSeqs = new ArrayList<>();

1644

List<AlignedCodonFrame> mappings = dataset.getCodonFrames();

1645

HashSet<SequenceI> productSeqs = null;

1646

if (products != null)

1647

{

1648

productSeqs = new HashSet<>();

1649

for (SequenceI seq : products)

1650

{

1651

productSeqs.add(seq.getDatasetSequence() == null ? seq : seq

1652

.getDatasetSequence());

}

}

* Construct CDS sequences from mappings on the alignment dataset.

1658

* The logic is:

1659

* - find the protein product(s) mapped to from each dna sequence

1660

* - if the mapping covers the whole dna sequence (give or take start/stop

1661

* codon), take the dna as the CDS sequence

1662

* - else search dataset mappings for a suitable dna sequence, i.e. one

1663

* whose whole sequence is mapped to the protein

1664

* - if no sequence found, construct one from the dna sequence and mapping

1665

* (and add it to dataset so it is found if this is repeated)

1666

1667

for (SequenceI dnaSeq : dna)

1668

{

1669

SequenceI dnaDss = dnaSeq.getDatasetSequence() == null ? dnaSeq

1670

: dnaSeq.getDatasetSequence();

1671

1672

List<AlignedCodonFrame> seqMappings = MappingUtils

1673

.findMappingsForSequence(dnaSeq, mappings);

1674

for (AlignedCodonFrame mapping : seqMappings)

1675

{

1676

List<Mapping> mappingsFromSequence = mapping

1677

.getMappingsFromSequence(dnaSeq);

1678

1679

for (Mapping aMapping : mappingsFromSequence)

1680

{

1681

MapList mapList = aMapping.getMap();

1682

if (mapList.getFromRatio() == 1)

1683

{

1684

1685

* not a dna-to-protein mapping (likely dna-to-cds)

continue;

}

* skip if mapping is not to one of the target set of proteins

1692

1693

SequenceI proteinProduct = aMapping.getTo();

1694

if (productSeqs != null && !productSeqs.contains(proteinProduct))

{

continue;

}

* try to locate the CDS from the dataset mappings;

1701

* guard against duplicate results (for the case that protein has

1702

* dbrefs to both dna and cds sequences)

1703

1704

SequenceI cdsSeq = findCdsForProtein(mappings, dnaSeq,

1705

seqMappings, aMapping);

1706

if (cdsSeq != null)

1707

{

1708

if (!foundSeqs.contains(cdsSeq))

1709

{

1710

foundSeqs.add(cdsSeq);

1711

SequenceI derivedSequence = cdsSeq.deriveSequence();

1712

cdsSeqs.add(derivedSequence);

1713

if (!dataset.getSequences().contains(cdsSeq))

1714

{

1715

dataset.addSequence(cdsSeq);

}

}

continue;

}

* didn't find mapped CDS sequence - construct it and add

1723

* its dataset sequence to the dataset

1724

1725

cdsSeq = makeCdsSequence(dnaSeq.getDatasetSequence(), aMapping,

1726

dataset).deriveSequence();

1727

// cdsSeq has a name constructed as CDS|<dbref>

1728

// <dbref> will be either the accession for the coding sequence,

1729

// marked in the /via/ dbref to the protein product accession

1730

// or it will be the original nucleotide accession.

1731

SequenceI cdsSeqDss = cdsSeq.getDatasetSequence();

cdsSeqs.add(cdsSeq);

* build the mapping from CDS to protein

1737

1738

List<int[]> cdsRange = Collections

1739

.singletonList(new int[]

1740

{ cdsSeq.getStart(),

1741

cdsSeq.getLength() + cdsSeq.getStart() - 1 });

1742

MapList cdsToProteinMap = new MapList(cdsRange,

1743

mapList.getToRanges(), mapList.getFromRatio(),

1744

mapList.getToRatio());

1745

1746

if (!dataset.getSequences().contains(cdsSeqDss))

1747

{

1748

1749

* if this sequence is a newly created one, add it to the dataset

1750

* and made a CDS to protein mapping (if sequence already exists,

1751

* CDS-to-protein mapping _is_ the transcript-to-protein mapping)

1752

1753

dataset.addSequence(cdsSeqDss);

1754

AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();

1755

cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct,

cdsToProteinMap);

* guard against duplicating the mapping if repeating this action

1760

1761

if (!mappings.contains(cdsToProteinMapping))

1762

{

1763

mappings.add(cdsToProteinMapping);

}

}

propagateDBRefsToCDS(cdsSeqDss, dnaSeq.getDatasetSequence(),

1768

proteinProduct, aMapping);

1769

1770

* add another mapping from original 'from' range to CDS

1771

1772

AlignedCodonFrame dnaToCdsMapping = new AlignedCodonFrame();

1773

final MapList dnaToCdsMap = new MapList(mapList.getFromRanges(),

1774

cdsRange, 1, 1);

1775

dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeqDss,

1776

dnaToCdsMap);

1777

if (!mappings.contains(dnaToCdsMapping))

1778

{

1779

mappings.add(dnaToCdsMapping);

}

* transfer dna chromosomal loci (if known) to the CDS

1784

* sequence (via the mapping)

1785

1786

final MapList cdsToDnaMap = dnaToCdsMap.getInverse();

1787

transferGeneLoci(dnaSeq, cdsToDnaMap, cdsSeq);

1788

1789

1790

* add DBRef with mapping from protein to CDS

1791

* (this enables Get Cross-References from protein alignment)

1792

* This is tricky because we can't have two DBRefs with the

1793

* same source and accession, so need a different accession for

1794

* the CDS from the dna sequence

1795

1796

1797

// specific use case:

1798

// Genomic contig ENSCHR:1, contains coding regions for ENSG01,

1799

// ENSG02, ENSG03, with transcripts and products similarly named.

1800

// cannot add distinct dbrefs mapping location on ENSCHR:1 to ENSG01

1801

1802

// JBPNote: ?? can't actually create an example that demonstrates we

1803

// need to

1804

// synthesize an xref.

1805

1806

List<DBRefEntry> primrefs = dnaDss.getPrimaryDBRefs();

1807

for (int ip = 0, np = primrefs.size(); ip < np; ip++)

1808

{

1809

DBRefEntry primRef = primrefs.get(ip);

1810

1811

* create a cross-reference from CDS to the source sequence's

1812

* primary reference and vice versa

1813

1814

String source = primRef.getSource();

1815

String version = primRef.getVersion();

1816

DBRefEntry cdsCrossRef = new DBRefEntry(source, source + ":"

1817

+ version, primRef.getAccessionId());

1818

cdsCrossRef.setMap(new Mapping(dnaDss, new MapList(cdsToDnaMap)));

1819

cdsSeqDss.addDBRef(cdsCrossRef);

1820

1821

dnaSeq.addDBRef(new DBRefEntry(source, version, cdsSeq

1822

.getName(), new Mapping(cdsSeqDss, dnaToCdsMap)));

1823

// problem here is that the cross-reference is synthesized -

1824

// cdsSeq.getName() may be like 'CDS|dnaaccession' or

1825

// 'CDS|emblcdsacc'

1826

// assuming cds version same as dna ?!?

1827

1828

DBRefEntry proteinToCdsRef = new DBRefEntry(source, version,

1829

cdsSeq.getName());

1830

1831

proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap

1832

.getInverse()));

1833

proteinProduct.addDBRef(proteinToCdsRef);

1834

}

1835

1836

* transfer any features on dna that overlap the CDS

1837

1838

transferFeatures(dnaSeq, cdsSeq, dnaToCdsMap, null,

1839

SequenceOntologyI.CDS);

}

}

}

AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs

1845

.size()]));

1846

cds.setDataset(dataset);

return cds;

}

/**

* Tries to transfer gene loci (dbref to chromosome positions) from fromSeq to

1853

* toSeq, mediated by the given mapping between the sequences

1854

1855

* @param fromSeq

1856

* @param targetToFrom

* Map

* @param targetSeq

protected static void transferGeneLoci(SequenceI fromSeq,

1861

MapList targetToFrom, SequenceI targetSeq)

1862

{

1863

if (targetSeq.getGeneLoci() != null)

1864

{

1865

// already have - don't override

1866

return;

1867

}

1868

GeneLociI fromLoci = fromSeq.getGeneLoci();

1869

if (fromLoci == null)

{

return;

}

MapList newMap = targetToFrom.traverse(fromLoci.getMapping());

if (newMap != null)

{

targetSeq.setGeneLoci(fromLoci.getSpeciesId(),

1879

fromLoci.getAssemblyId(), fromLoci.getChromosomeId(), newMap);

}

}

/**

* A helper method that finds a CDS sequence in the alignment dataset that is

1885

* mapped to the given protein sequence, and either is, or has a mapping from,

1886

* the given dna sequence.

1887

1888

* @param mappings

1889

* set of all mappings on the dataset

1890

* @param dnaSeq

1891

* a dna (or cds) sequence we are searching from

1892

* @param seqMappings

1893

* the set of mappings involving dnaSeq

1894

* @param aMapping

1895

* a transcript-to-peptide mapping

1896

* @return

1897

1898

static SequenceI findCdsForProtein(List<AlignedCodonFrame> mappings,

1899

SequenceI dnaSeq, List<AlignedCodonFrame> seqMappings,

Mapping aMapping)

{

* TODO a better dna-cds-protein mapping data representation to allow easy

1904

* navigation; until then this clunky looping around lists of mappings

1905

1906

SequenceI seqDss = dnaSeq.getDatasetSequence() == null ? dnaSeq

1907

: dnaSeq.getDatasetSequence();

1908

SequenceI proteinProduct = aMapping.getTo();

1909

1910

1911

* is this mapping from the whole dna sequence (i.e. CDS)?

1912

* allowing for possible stop codon on dna but not peptide

1913

1914

int mappedFromLength = MappingUtils

1915

.getLength(aMapping.getMap().getFromRanges());

1916

int dnaLength = seqDss.getLength();

1917

if (mappedFromLength == dnaLength

1918

|| mappedFromLength == dnaLength - CODON_LENGTH)

1919

{

1920

1921

* if sequence has CDS features, this is a transcript with no UTR

1922

* - do not take this as the CDS sequence! (JAL-2789)

1923

1924

if (seqDss.getFeatures().getFeaturesByOntology(SequenceOntologyI.CDS)

.isEmpty())

{

return seqDss;

}

}

* looks like we found the dna-to-protein mapping; search for the

1933

* corresponding cds-to-protein mapping

1934

1935

List<AlignedCodonFrame> mappingsToPeptide = MappingUtils

1936

.findMappingsForSequence(proteinProduct, mappings);

1937

for (AlignedCodonFrame acf : mappingsToPeptide)

1938

{

1939

for (SequenceToSequenceMapping map : acf.getMappings())

1940

{

1941

253

Mapping mapping = map.getMapping();

1942

253

if (mapping != aMapping

1943

&& mapping.getMap().getFromRatio() == CODON_LENGTH

1944

&& proteinProduct == mapping.getTo()

1945

&& seqDss != map.getFromSeq())

1946

{

1947

mappedFromLength = MappingUtils

1948

.getLength(mapping.getMap().getFromRanges());

1949

if (mappedFromLength == map.getFromSeq().getLength())

1950

{

1951

1952

* found a 3:1 mapping to the protein product which covers

1953

* the whole dna sequence i.e. is from CDS; finally check the CDS

1954

* is mapped from the given dna start sequence

1955

1956

SequenceI cdsSeq = map.getFromSeq();

1957

// todo this test is weak if seqMappings contains multiple mappings;

1958

// we get away with it if transcript:cds relationship is 1:1

1959

List<AlignedCodonFrame> dnaToCdsMaps = MappingUtils

1960

.findMappingsForSequence(cdsSeq, seqMappings);

1961

if (!dnaToCdsMaps.isEmpty())

{

return cdsSeq;

}

}

}

}

}

return null;

}

/**

* Helper method that makes a CDS sequence as defined by the mappings from the

1974

* given sequence i.e. extracts the 'mapped from' ranges (which may be on

1975

* forward or reverse strand).

* @param seq

* @param mapping

* @param dataset

* - existing dataset. We check for sequences that look like the CDS

1981

* we are about to construct, if one exists already, then we will

1982

* just return that one.

1983

* @return CDS sequence (as a dataset sequence)

1984

1985

static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping,

AlignmentI dataset)

{

* construct CDS sequence name as "CDS|" with 'from id' held in the mapping

1990

* if set (e.g. EMBL protein_id), else sequence name appended

1991

1992

String mapFromId = mapping.getMappedFromId();

1993

final String seqId = "CDS|"

1994

+ (mapFromId != null ? mapFromId : seq.getName());

1995

1996

SequenceI newSeq = null;

1997

1998

final MapList maplist = mapping.getMap();

1999

if (maplist.isContiguous() && maplist.isFromForwardStrand())

2000

{

2001

2002

* just a subsequence, keep same dataset sequence

2003

2004

int start = maplist.getFromLowest();

2005

int end = maplist.getFromHighest();

2006

newSeq = seq.getSubSequence(start - 1, end);

2007

newSeq.setName(seqId);

}

else

{

* construct by splicing mapped from ranges

2013

2014

char[] seqChars = seq.getSequence();

2015

List<int[]> fromRanges = maplist.getFromRanges();

2016

int cdsWidth = MappingUtils.getLength(fromRanges);

2017

char[] newSeqChars = new char[cdsWidth];

2018

2019

int newPos = 0;

2020

for (int[] range : fromRanges)

2021

{

2022

if (range[0] <= range[1])

2023

{

2024

// forward strand mapping - just copy the range

2025

int length = range[1] - range[0] + 1;

2026

System.arraycopy(seqChars, range[0] - 1, newSeqChars, newPos,

length);

newPos += length;

}

else

{

// reverse strand mapping - copy and complement one by one

2033

for (int i = range[0]; i >= range[1]; i--)

2034

{

2035

newSeqChars[newPos++] = Dna.getComplement(seqChars[i - 1]);

}

}

}

newSeq = new Sequence(seqId, newSeqChars, 1, newPos);

}

if (dataset != null)

{

SequenceI[] matches = dataset.findSequenceMatch(newSeq.getName());

2046

if (matches != null)

2047

{

2048

boolean matched = false;

2049

for (SequenceI mtch : matches)

2050

{

2051

if (mtch.getStart() != newSeq.getStart())

{

continue;

}

if (mtch.getEnd() != newSeq.getEnd())

{

continue;

}

if (!Arrays.equals(mtch.getSequence(), newSeq.getSequence()))

{

continue;

}

if (!matched)

{

matched = true;

newSeq = mtch;

}

else

{

System.err.println(

"JAL-2154 regression: warning - found (and ignnored a duplicate CDS sequence):"

+ mtch.toString());

}

}

}

}

// newSeq.setDescription(mapFromId);

return newSeq;

}

/**

* Adds any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to

* the given mapping.

* @param cdsSeq

* @param contig

* @param proteinProduct

2089

* @param mapping

2090

* @return list of DBRefEntrys added

2091

2092

protected static List<DBRefEntry> propagateDBRefsToCDS(SequenceI cdsSeq,

2093

SequenceI contig, SequenceI proteinProduct, Mapping mapping)

2094

{

2095

2096

// gather direct refs from contig congruent with mapping

2097

List<DBRefEntry> direct = new ArrayList<>();

2098

HashSet<String> directSources = new HashSet<>();

2099

2100

List<DBRefEntry> refs = contig.getDBRefs();

2101

if (refs != null)

2102

{

2103

600

for (int ib = 0, nb = refs.size(); ib < nb; ib++)

2104

{

2105

576

DBRefEntry dbr = refs.get(ib);

2106

576

MapList map;

2107

if (dbr.hasMap() && (map = dbr.getMap().getMap()).isTripletMap())

2108

{

2109

// check if map is the CDS mapping

2110

if (mapping.getMap().equals(map))

2111

{

2112

direct.add(dbr);

2113

directSources.add(dbr.getSource());

}

}

}

}

List<DBRefEntry> onSource = DBRefUtils.selectRefs(

2119

proteinProduct.getDBRefs(),

2120

directSources.toArray(new String[0]));

2121

List<DBRefEntry> propagated = new ArrayList<>();

2122

2123

// and generate appropriate mappings

2124

for (int ic = 0, nc = direct.size(); ic < nc; ic++)

2125

{

2126

DBRefEntry cdsref = direct.get(ic);

2127

Mapping m = cdsref.getMap();

2128

// clone maplist and mapping

2129

MapList cdsposmap = new MapList(

2130

Arrays.asList(new int[][]

2131

{ new int[] { cdsSeq.getStart(), cdsSeq.getEnd() } }),

2132

m.getMap().getToRanges(), 3, 1);

2133

Mapping cdsmap = new Mapping(m.getTo(), m.getMap());

2134

2135

// create dbref

2136

DBRefEntry newref = new DBRefEntry(cdsref.getSource(),

2137

cdsref.getVersion(), cdsref.getAccessionId(),

2138

new Mapping(cdsmap.getTo(), cdsposmap));

2139

2140

// and see if we can map to the protein product for this mapping.

2141

// onSource is the filtered set of accessions on protein that we are

2142

// tranferring, so we assume accession is the same.

2143

if (cdsmap.getTo() == null && onSource != null)

2144

{

2145

List<DBRefEntry> sourceRefs = DBRefUtils.searchRefs(onSource,

2146

cdsref.getAccessionId());

2147

if (sourceRefs != null)

2148

{

2149

for (DBRefEntry srcref : sourceRefs)

2150

{

2151

if (srcref.getSource().equalsIgnoreCase(cdsref.getSource()))

2152

{

2153

// we have found a complementary dbref on the protein product, so

2154

// update mapping's getTo

2155

newref.getMap().setTo(proteinProduct);

}

}

}

}

cdsSeq.addDBRef(newref);

2161

propagated.add(newref);

}

return propagated;

}

/**

* Transfers co-located features on 'fromSeq' to 'toSeq', adjusting the

2168

* feature start/end ranges, optionally omitting specified feature types.

2169

* Returns the number of features copied.

* @param fromSeq

* @param toSeq

* @param mapping

* the mapping from 'fromSeq' to 'toSeq'

2175

* @param select

2176

* if not null, only features of this type are copied (including

2177

* subtypes in the Sequence Ontology)

2178

* @param omitting

2179

2180

protected static int transferFeatures(SequenceI fromSeq, SequenceI toSeq,

2181

MapList mapping, String select, String... omitting)

2182

{

2183

SequenceI copyTo = toSeq;

2184

while (copyTo.getDatasetSequence() != null)

2185

{

2186

copyTo = copyTo.getDatasetSequence();

2187

}

2188

if (fromSeq == copyTo || fromSeq.getDatasetSequence() == copyTo)

2189

{

2190

return 0; // shared dataset sequence

}

* get features, optionally restricted by an ontology term

2195

2196

List<SequenceFeature> sfs = select == null ? fromSeq.getFeatures()

2197

.getPositionalFeatures() : fromSeq.getFeatures()

2198

.getFeaturesByOntology(select);

2199

2200

int count = 0;

2201

for (SequenceFeature sf : sfs)

2202

{

2203

String type = sf.getType();

2204

boolean omit = false;

2205

for (String toOmit : omitting)

2206

{

2207

if (type.equals(toOmit))

{

omit = true;

}

}

if (omit)

{

continue;

}

* locate the mapped range - null if either start or end is

2219

* not mapped (no partial overlaps are calculated)

2220

2221

int start = sf.getBegin();

2222

int end = sf.getEnd();

2223

int[] mappedTo = mapping.locateInTo(start, end);

2224

2225

* if whole exon range doesn't map, try interpreting it

2226

* as 5' or 3' exon overlapping the CDS range

2227

2228

if (mappedTo == null)

2229

{

2230

mappedTo = mapping.locateInTo(end, end);

2231

if (mappedTo != null)

2232

{

2233

2234

* end of exon is in CDS range - 5' overlap

2235

* to a range from the start of the peptide

mappedTo[0] = 1;

}

}

if (mappedTo == null)

2241

{

2242

mappedTo = mapping.locateInTo(start, start);

2243

if (mappedTo != null)

2244

{

2245

2246

* start of exon is in CDS range - 3' overlap

2247

* to a range up to the end of the peptide

2248

2249

mappedTo[1] = toSeq.getLength();

2250

}

2251

}

2252

if (mappedTo != null)

2253

{

2254

int newBegin = Math.min(mappedTo[0], mappedTo[1]);

2255

int newEnd = Math.max(mappedTo[0], mappedTo[1]);

2256

SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd,

2257

sf.getFeatureGroup(), sf.getScore());

2258

copyTo.addSequenceFeature(copy);

count++;

}

}

return count;

}

/**

* Returns a mapping from dna to protein by inspecting sequence features of

2267

* type "CDS" on the dna. A mapping is constructed if the total CDS feature

2268

* length is 3 times the peptide length (optionally after dropping a trailing

2269

* stop codon). This method does not check whether the CDS nucleotide sequence

2270

* translates to the peptide sequence.

* @param dnaSeq

* @param proteinSeq

* @return

public static MapList mapCdsToProtein(SequenceI dnaSeq,

2277

SequenceI proteinSeq)

2278

{

2279

List<int[]> ranges = findCdsPositions(dnaSeq);

2280

int mappedDnaLength = MappingUtils.getLength(ranges);

2281

2282

2283

* if not a whole number of codons, truncate mapping

2284

2285

int codonRemainder = mappedDnaLength % CODON_LENGTH;

2286

if (codonRemainder > 0)

2287

{

2288

mappedDnaLength -= codonRemainder;

2289

MappingUtils.removeEndPositions(codonRemainder, ranges);

2290

}

2291

2292

int proteinLength = proteinSeq.getLength();

2293

int proteinStart = proteinSeq.getStart();

2294

int proteinEnd = proteinSeq.getEnd();

2295

2296

2297

* incomplete start codon may mean X at start of peptide

2298

* we ignore both for mapping purposes

2299

2300

if (proteinSeq.getCharAt(0) == 'X')

2301

{

2302

// todo JAL-2022 support startPhase > 0

proteinStart++;

proteinLength--;

}

List<int[]> proteinRange = new ArrayList<>();

2307

2308

2309

* dna length should map to protein (or protein plus stop codon)

2310

2311

int codesForResidues = mappedDnaLength / CODON_LENGTH;

2312

if (codesForResidues == (proteinLength + 1))

2313

{

2314

// assuming extra codon is for STOP and not in peptide

2315

// todo: check trailing codon is indeed a STOP codon

2316

codesForResidues--;

2317

mappedDnaLength -= CODON_LENGTH;

2318

MappingUtils.removeEndPositions(CODON_LENGTH, ranges);

2319

}

2320

2321

if (codesForResidues == proteinLength)

2322

{

2323

proteinRange.add(new int[] { proteinStart, proteinEnd });

2324

return new MapList(ranges, proteinRange, CODON_LENGTH, 1);

}

return null;

}

/**

* Returns a list of CDS ranges found (as sequence positions base 1), i.e. of

2331

* [start, end] positions of sequence features of type "CDS" (or a sub-type of

2332

* CDS in the Sequence Ontology). The ranges are sorted into ascending start

2333

* position order, so this method is only valid for linear CDS in the same

2334

* sense as the protein product.

* @param dnaSeq

* @return

protected static List<int[]> findCdsPositions(SequenceI dnaSeq)

2340

{

2341

List<int[]> result = new ArrayList<>();

2342

2343

List<SequenceFeature> sfs = dnaSeq.getFeatures().getFeaturesByOntology(

2344

SequenceOntologyI.CDS);

if (sfs.isEmpty())

{

return result;

}

SequenceFeatures.sortFeatures(sfs, true);

2350

2351

for (SequenceFeature sf : sfs)

{

int phase = 0;

try

{

String s = sf.getPhase();

2357

if (s != null)

2358

{

2359

phase = Integer.parseInt(s);

2360

}

2361

} catch (NumberFormatException e)

{

// leave as zero

}

* phase > 0 on first codon means 5' incomplete - skip to the start

2367

* of the next codon; example ENST00000496384

2368

2369

int begin = sf.getBegin();

2370

int end = sf.getEnd();

2371

if (result.isEmpty() && phase > 0)

{

begin += phase;

if (begin > end)

{

// shouldn't happen!

System.err

.println("Error: start phase extends beyond start CDS in "

+ dnaSeq.getName());

}

}

result.add(new int[] { begin, end });

}

* Finally sort ranges by start position. This avoids a dependency on

2387

* keeping features in order on the sequence (if they are in order anyway,

2388

* the sort will have almost no work to do). The implicit assumption is CDS

2389

* ranges are assembled in order. Other cases should not use this method,

2390

* but instead construct an explicit mapping for CDS (e.g. EMBL parsing).

2391

2392

Collections.sort(result, IntRangeComparator.ASCENDING);

return result;

}

/**

* Makes an alignment with a copy of the given sequences, adding in any

2398

* non-redundant sequences which are mapped to by the cross-referenced

* sequences.

* @param seqs

* @param xrefs

* @param dataset

* the alignment dataset shared by the new copy

2405

* @return

2406

2407

public static AlignmentI makeCopyAlignment(SequenceI[] seqs,

2408

SequenceI[] xrefs, AlignmentI dataset)

2409

{

2410

AlignmentI copy = new Alignment(new Alignment(seqs));

2411

copy.setDataset(dataset);

2412

boolean isProtein = !copy.isNucleotide();

2413

SequenceIdMatcher matcher = new SequenceIdMatcher(seqs);

2414

if (xrefs != null)

2415

{

2416

// BH 2019.01.25 recoded to remove iterators

2417

2418

for (int ix = 0, nx = xrefs.length; ix < nx; ix++)

2419

{

2420

SequenceI xref = xrefs[ix];

2421

List<DBRefEntry> dbrefs = xref.getDBRefs();

2422

if (dbrefs != null)

2423

{

2424

for (int ir = 0, nir = dbrefs.size(); ir < nir; ir++)

2425

{

2426

DBRefEntry dbref = dbrefs.get(ir);

2427

Mapping map = dbref.getMap();

2428

SequenceI mto;

2429

if (map == null || (mto = map.getTo()) == null

2430

|| mto.isProtein() != isProtein)

{

continue;

}

SequenceI mappedTo = mto;

2435

SequenceI match = matcher.findIdMatch(mappedTo);

2436

if (match == null)

2437

{

2438

matcher.add(mappedTo);

2439

copy.addSequence(mappedTo);

}

}

}

}

}

return copy;

}

/**

* Try to align sequences in 'unaligned' to match the alignment of their

2450

* mapped regions in 'aligned'. For example, could use this to align CDS

2451

* sequences which are mapped to their parent cDNA sequences.

2452

2453

* This method handles 1:1 mappings (dna-to-dna or protein-to-protein). For

2454

* dna-to-protein or protein-to-dna use alternative methods.

2455

2456

* @param unaligned

2457

* sequences to be aligned

2458

* @param aligned

2459

* holds aligned sequences and their mappings

2460

* @return

2461

2462

public static int alignAs(AlignmentI unaligned, AlignmentI aligned)

2463

{

2464

2465

* easy case - aligning a copy of aligned sequences

2466

2467

if (alignAsSameSequences(unaligned, aligned))

2468

{

2469

return unaligned.getHeight();

}

* fancy case - aligning via mappings between sequences

2474

2475

List<SequenceI> unmapped = new ArrayList<>();

2476

Map<Integer, Map<SequenceI, Character>> columnMap = buildMappedColumnsMap(

2477

unaligned, aligned, unmapped);

2478

int width = columnMap.size();

2479

char gap = unaligned.getGapCharacter();

2480

int realignedCount = 0;

2481

// TODO: verify this loop scales sensibly for very wide/high alignments

2482

2483

for (SequenceI seq : unaligned.getSequences())

2484

{

2485

if (!unmapped.contains(seq))

2486

{

2487

char[] newSeq = new char[width];

2488

Arrays.fill(newSeq, gap); // JBPComment - doubt this is faster than the

2489

// Integer iteration below

int newCol = 0;

int lastCol = 0;

* traverse the map to find columns populated

2495

* by our sequence

2496

2497

for (Integer column : columnMap.keySet())

2498

{

2499

Character c = columnMap.get(column).get(seq);

if (c != null)

{

* sequence has a character at this position

newSeq[newCol] = c;

lastCol = newCol;

}

newCol++;

}

* trim trailing gaps

if (lastCol < width)

{

char[] tmp = new char[lastCol + 1];

2518

System.arraycopy(newSeq, 0, tmp, 0, lastCol + 1);

2519

newSeq = tmp;

2520

}

2521

// TODO: optimise SequenceI to avoid char[]->String->char[]

2522

seq.setSequence(String.valueOf(newSeq));

realignedCount++;

}

}

return realignedCount;

}

/**

* If unaligned and aligned sequences share the same dataset sequences, then

2531

* simply copies the aligned sequences to the unaligned sequences and returns

2532

* true; else returns false

2533

2534

* @param unaligned

2535

* - sequences to be aligned based on aligned

2536

* @param aligned

2537

* - 'guide' alignment containing sequences derived from same

2538

* dataset as unaligned

2539

* @return

2540

2541

static boolean alignAsSameSequences(AlignmentI unaligned,

2542

AlignmentI aligned)

2543

{

2544

if (aligned.getDataset() == null || unaligned.getDataset() == null)

2545

{

2546

return false; // should only pass alignments with datasets here

2547

}

2548

2549

// map from dataset sequence to alignment sequence(s)

2550

Map<SequenceI, List<SequenceI>> alignedDatasets = new HashMap<>();

2551

for (SequenceI seq : aligned.getSequences())

2552

{

2553

SequenceI ds = seq.getDatasetSequence();

2554

if (alignedDatasets.get(ds) == null)

2555

{

2556

alignedDatasets.put(ds, new ArrayList<SequenceI>());

2557

}

2558

alignedDatasets.get(ds).add(seq);

}

* first pass - check whether all sequences to be aligned share a

2563

* dataset sequence with an aligned sequence; also note the leftmost

2564

* ungapped column from which to copy

2565

2566

int leftmost = Integer.MAX_VALUE;

2567

for (SequenceI seq : unaligned.getSequences())

2568

{

2569

final SequenceI ds = seq.getDatasetSequence();

2570

if (!alignedDatasets.containsKey(ds))

{

return false;

}

SequenceI alignedSeq = alignedDatasets.get(ds)

2575

.get(0);

2576

int startCol = alignedSeq.findIndex(seq.getStart()); // 1..

2577

leftmost = Math.min(leftmost, startCol);

}

* second pass - copy aligned sequences;

2582

* heuristic rule: pair off sequences in order for the case where

2583

* more than one shares the same dataset sequence

2584

2585

final char gapCharacter = aligned.getGapCharacter();

2586

for (SequenceI seq : unaligned.getSequences())

2587

{

2588

List<SequenceI> alignedSequences = alignedDatasets

2589

.get(seq.getDatasetSequence());

2590

if (alignedSequences.isEmpty())

2591

{

2592

2593

* defensive check - shouldn't happen! (JAL-3536)

continue;

}

SequenceI alignedSeq = alignedSequences.get(0);

2598

2599

2600

* gap fill for leading (5') UTR if any

2601

2602

// TODO this copies intron columns - wrong!

2603

int startCol = alignedSeq.findIndex(seq.getStart()); // 1..

2604

int endCol = alignedSeq.findIndex(seq.getEnd());

2605

char[] seqchars = new char[endCol - leftmost + 1];

2606

Arrays.fill(seqchars, gapCharacter);

2607

char[] toCopy = alignedSeq.getSequence(startCol - 1, endCol);

2608

System.arraycopy(toCopy, 0, seqchars, startCol - leftmost,

2609

toCopy.length);

2610

seq.setSequence(String.valueOf(seqchars));

2611

if (alignedSequences.size() > 0)

2612

{

2613

// pop off aligned sequences (except the last one)

2614

alignedSequences.remove(0);

}

}

* finally remove gapped columns (e.g. introns)

2620

2621

new RemoveGapColCommand("", unaligned.getSequencesArray(), 0,

2622

unaligned.getWidth() - 1, unaligned);

return true;

}

/**

* Returns a map whose key is alignment column number (base 1), and whose

2629

* values are a map of sequence characters in that column.

* @param unaligned

* @param aligned

* @param unmapped

* @return

static SortedMap<Integer, Map<SequenceI, Character>> buildMappedColumnsMap(

2637

AlignmentI unaligned, AlignmentI aligned,

2638

List<SequenceI> unmapped)

2639

{

2640

2641

* Map will hold, for each aligned column position, a map of

2642

* {unalignedSequence, characterPerSequence} at that position.

2643

* TreeMap keeps the entries in ascending column order.

2644

2645

SortedMap<Integer, Map<SequenceI, Character>> map = new TreeMap<>();

2646

2647

2648

* record any sequences that have no mapping so can't be realigned

2649

2650

unmapped.addAll(unaligned.getSequences());

2651

2652

List<AlignedCodonFrame> mappings = aligned.getCodonFrames();

2653

2654

for (SequenceI seq : unaligned.getSequences())

2655

{

2656

for (AlignedCodonFrame mapping : mappings)

2657

{

2658

SequenceI fromSeq = mapping.findAlignedSequence(seq, aligned);

2659

if (fromSeq != null)

2660

{

2661

Mapping seqMap = mapping.getMappingBetween(fromSeq, seq);

2662

if (addMappedPositions(seq, fromSeq, seqMap, map))

2663

{

2664

unmapped.remove(seq);

}

}

}

}

return map;

}

/**

* Helper method that adds to a map the mapped column positions of a sequence.

2674

* <br>

2675

* For example if aaTT-Tg-gAAA is mapped to TTTAAA then the map should record

2676

* that columns 3,4,6,10,11,12 map to characters T,T,T,A,A,A of the mapped to

* sequence.

* @param seq

* the sequence whose column positions we are recording

2681

* @param fromSeq

2682

* a sequence that is mapped to the first sequence

2683

* @param seqMap

2684

* the mapping from 'fromSeq' to 'seq'

2685

* @param map

2686

* a map to add the column positions (in fromSeq) of the mapped

* positions of seq

* @return

static boolean addMappedPositions(SequenceI seq, SequenceI fromSeq,

2691

Mapping seqMap, Map<Integer, Map<SequenceI, Character>> map)

{

if (seqMap == null)

{

return false;

}

* invert mapping if it is from unaligned to aligned sequence

2700

2701

if (seqMap.getTo() == fromSeq.getDatasetSequence())

2702

{

2703

seqMap = new Mapping(seq.getDatasetSequence(),

2704

seqMap.getMap().getInverse());

2705

}

2706

2707

int toStart = seq.getStart();

2708

2709

2710

* traverse [start, end, start, end...] ranges in fromSeq

2711

2712

for (int[] fromRange : seqMap.getMap().getFromRanges())

2713

{

2714

for (int i = 0; i < fromRange.length - 1; i += 2)

2715

{

2716

boolean forward = fromRange[i + 1] >= fromRange[i];

2717

2718

2719

* find the range mapped to (sequence positions base 1)

2720

2721

int[] range = seqMap.locateMappedRange(fromRange[i],

fromRange[i + 1]);

if (range == null)

{

System.err.println("Error in mapping " + seqMap + " from "

2726

+ fromSeq.getName());

2727

return false;

2728

}

2729

int fromCol = fromSeq.findIndex(fromRange[i]);

2730

int mappedCharPos = range[0];

2731

2732

2733

* walk over the 'from' aligned sequence in forward or reverse

2734

* direction; when a non-gap is found, record the column position

2735

* of the next character of the mapped-to sequence; stop when all

2736

* the characters of the range have been counted

2737

2738

while (mappedCharPos <= range[1] && fromCol <= fromSeq.getLength()

2739

&& fromCol >= 0)

2740

{

2741

if (!Comparison.isGap(fromSeq.getCharAt(fromCol - 1)))

2742

{

2743

2744

* mapped from sequence has a character in this column

2745

* record the column position for the mapped to character

2746

2747

Map<SequenceI, Character> seqsMap = map.get(fromCol);

2748

if (seqsMap == null)

2749

{

2750

seqsMap = new HashMap<>();

2751

map.put(fromCol, seqsMap);

2752

}

2753

seqsMap.put(seq, seq.getCharAt(mappedCharPos - toStart));

2754

mappedCharPos++;

2755

}

2756

fromCol += (forward ? 1 : -1);

}

}

}

return true;

}

// strictly temporary hack until proper criteria for aligning protein to cds

2764

// are in place; this is so Ensembl -> fetch xrefs Uniprot aligns the Uniprot

2765

public static boolean looksLikeEnsembl(AlignmentI alignment)

2766

{

2767

for (SequenceI seq : alignment.getSequences())

2768

{

2769

String name = seq.getName();

2770

if (!name.startsWith("ENSG") && !name.startsWith("ENST"))

{

return false;

}

}

return true;

}

}

Coverage Report

File AlignmentUtils.java

Coverage histogram

Code metrics

Classes

Class AlignmentUtils

Class AlignmentUtils.DnaVariant

Contributing tests

Contributing tests

Source view