File AlignmentUtils.java

Branches:

438

Statements:

889

Methods:

Classes:

LOC:

3,089

NCLOC:

1,868

Total complexity:

319

Complexity density:

0.36

Statements/Method:

18.52

Methods/Class:

Average method complexity:

6.65

Classes

Class	Line #	Total Statements	Complexity	Uncovered Elements	TOTAL Coverage	Actions
AlignmentUtils	75	883	313	214	0.8427626584.3%
AlignmentUtils.DnaVariant	88	6	6	5	0.6428571364.3%

Class AlignmentUtils

Class AlignmentUtils	Line # 75	Total Statements 883	Complexity 313	Uncovered Elements 214	TOTAL Coverage 0.8427626584.3%
expandContext(AlignmentI,int) : AlignmentI expandContext(AlignmentI,int) : AlignmentI	129129	46.046	10.010	1.01	0.983871 0.98387198.4%
getSequenceIndex(AlignmentI,SequenceI) : int getSequenceIndex(AlignmentI,SequenceI) : int	235235	8.08	2.02	0.00	1.0 1.0100%
getSequencesByName(AlignmentI) : Map<String, List<SequenceI>> getSequencesByName(AlignmentI) : Map<String, List<SequenceI>>	258258	10.010	3.03	1.01	0.9285714 0.928571492.9%
mapProteinAlignmentToCdna(AlignmentI,AlignmentI) : boolean mapProteinAlignmentToCdna(AlignmentI,AlignmentI) : boolean	289289	7.07	3.03	2.02	0.7777778 0.777777877.8%
mapProteinToCdna(AlignmentI,AlignmentI,Set<SequenceI>,Set<SequenceI>,boolean) : boolean mapProteinToCdna(AlignmentI,AlignmentI,Set<SequenceI>,Set<SequenceI>,boolean) : boolean	331331	22.022	9.09	2.02	0.9375 0.937593.8%
mappingExists(List<AlignedCodonFrame>,SequenceI,SequenceI) : boolean mappingExists(List<AlignedCodonFrame>,SequenceI,SequenceI) : boolean	398398	5.05	3.03	3.03	0.6666667 0.666666766.7%
mapCdnaToProtein(SequenceI,SequenceI) : MapList mapCdnaToProtein(SequenceI,SequenceI) : MapList	431431	28.028	12.012	2.02	0.95238096 0.9523809695.2%
translatesAs(char[],int,char[]) : boolean translatesAs(char[],int,char[]) : boolean	519519	21.021	14.014	0.00	1.0 1.0100%
alignSequenceAs(SequenceI,AlignmentI,String,boolean,boolean) : boolean alignSequenceAs(SequenceI,AlignmentI,String,boolean,boolean) : boolean	595595	14.014	5.05	2.02	0.9 0.990%
alignSequenceAs(SequenceI,SequenceI,AlignedCodonFrame,String,char,boolean,boolean) : void alignSequenceAs(SequenceI,SequenceI,AlignedCodonFrame,String,char,boolean,boolean) : void	651651	60.060	20.020	0.00	1.0 1.0100%
calculateGapsToInsert(boolean,boolean,int,boolean,int,int,boolean) : int calculateGapsToInsert(boolean,boolean,int,boolean,int,int,boolean) : int	815815	15.015	10.010	0.00	1.0 1.0100%
alignProteinAsDna(AlignmentI,AlignmentI) : int alignProteinAsDna(AlignmentI,AlignmentI) : int	879879	6.06	3.03	3.03	0.625 0.62562.5%
alignCdsAsProtein(AlignmentI,AlignmentI) : int alignCdsAsProtein(AlignmentI,AlignmentI) : int	904904	18.018	5.05	4.04	0.8333333 0.833333383.3%
alignCdsSequenceAsProtein(SequenceI,AlignmentI,List<AlignedCodonFrame>,char) : boolean alignCdsSequenceAsProtein(SequenceI,AlignmentI,List<AlignedCodonFrame>,char) : boolean	949949	48.048	16.016	25.025	0.67105263 0.6710526367.1%
buildCodonColumnsMap(AlignmentI,AlignmentI,List<SequenceI>) : Map<AlignedCodon, Map<SequenceI, AlignedCodon>> buildCodonColumnsMap(AlignmentI,AlignmentI,List<SequenceI>) : Map<AlignedCodon, Map<SequenceI, AlignedCodon>>	10801080	13.013	2.02	1.01	0.93333334 0.9333333493.3%
addUnmappedPeptideStarts(Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,int) : void addUnmappedPeptideStarts(Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,int) : void	11361136	23.023	6.06	2.02	0.93939394 0.9393939493.9%
alignProteinAs(AlignmentI,Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,List<SequenceI>) : int alignProteinAs(AlignmentI,Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,List<SequenceI>) : int	12191219	17.017	2.02	0.00	1.0 1.0100%
addCodonPositions(SequenceI,SequenceI,char,Mapping,Map<AlignedCodon, Map<SequenceI, AlignedCodon>>) : void addCodonPositions(SequenceI,SequenceI,char,Mapping,Map<AlignedCodon, Map<SequenceI, AlignedCodon>>) : void	12841284	5.05	4.04	0.00	1.0 1.0100%
addCodonToMap(Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,AlignedCodon,SequenceI) : void addCodonToMap(Map<AlignedCodon, Map<SequenceI, AlignedCodon>>,AlignedCodon,SequenceI) : void	13171317	5.05	2.02	0.00	1.0 1.0100%
isMappable(AlignmentI,AlignmentI) : boolean isMappable(AlignmentI,AlignmentI) : boolean	13471347	12.012	7.07	2.02	0.90909094 0.9090909490.9%
isMappable(SequenceI,SequenceI,List<AlignedCodonFrame>) : boolean isMappable(SequenceI,SequenceI,List<AlignedCodonFrame>) : boolean	13861386	8.08	6.06	8.08	0.5 0.550%
findAddableReferenceAnnotations(List<SequenceI>,Map<String, String>,Map<SequenceI, List<AlignmentAnnotation>>,AlignmentI) : void findAddableReferenceAnnotations(List<SequenceI>,Map<String, String>,Map<SequenceI, List<AlignmentAnnotation>>,AlignmentI) : void	14331433	18.018	7.07	3.03	0.9 0.990%
addReferenceAnnotations(Map<SequenceI, List<AlignmentAnnotation>>,AlignmentI,SequenceGroup) : void addReferenceAnnotations(Map<SequenceI, List<AlignmentAnnotation>>,AlignmentI,SequenceGroup) : void	15011501	14.014	3.03	18.018	0.0 0.00%
showOrHideSequenceAnnotations(AlignmentI,Collection<String>,List<SequenceI>,boolean,boolean) : void showOrHideSequenceAnnotations(AlignmentI,Collection<String>,List<SequenceI>,boolean,boolean) : void	15521552	6.06	7.07	1.01	0.9166667 0.916666791.7%
haveCrossRef(SequenceI,SequenceI) : boolean haveCrossRef(SequenceI,SequenceI) : boolean	15801580	1.01	1.01	0.00	1.0 1.0100%
hasCrossRef(SequenceI,SequenceI) : boolean hasCrossRef(SequenceI,SequenceI) : boolean	15951595	10.010	5.05	0.00	1.0 1.0100%
makeCdsAlignment(SequenceI[],AlignmentI,SequenceI[]) : AlignmentI makeCdsAlignment(SequenceI[],AlignmentI,SequenceI[]) : AlignmentI	16351635	64.064	15.015	21.021	0.7613636 0.761363676.1%
transferGeneLoci(SequenceI,MapList,SequenceI) : void transferGeneLoci(SequenceI,MapList,SequenceI) : void	18561856	8.08	4.04	1.01	0.9285714 0.928571492.9%
findCdsForProtein(List<AlignedCodonFrame>,SequenceI,List<AlignedCodonFrame>,Mapping) : SequenceI findCdsForProtein(List<AlignedCodonFrame>,SequenceI,List<AlignedCodonFrame>,Mapping) : SequenceI	18941894	19.019	11.011	17.017	0.4516129 0.451612945.2%
makeCdsSequence(SequenceI,Mapping,AlignmentI) : SequenceI makeCdsSequence(SequenceI,Mapping,AlignmentI) : SequenceI	19811981	31.031	10.010	19.019	0.6122449 0.612244961.2%
propagateDBRefsToCDS(SequenceI,SequenceI,SequenceI,Mapping) : List<DBRefEntry> propagateDBRefsToCDS(SequenceI,SequenceI,SequenceI,Mapping) : List<DBRefEntry>	20662066	24.024	9.09	4.04	0.8888889 0.888888988.9%
transferFeatures(SequenceI,SequenceI,MapList,String,String) : int transferFeatures(SequenceI,SequenceI,MapList,String,String) : int	21512151	31.031	10.010	0.00	1.0 1.0100%
mapCdsToProtein(SequenceI,SequenceI) : MapList mapCdsToProtein(SequenceI,SequenceI) : MapList	22432243	22.022	5.05	0.00	1.0 1.0100%
findCdsPositions(SequenceI) : List<int[]> findCdsPositions(SequenceI) : List<int[]>	23062306	18.018	6.06	2.02	0.9166667 0.916666791.7%
computeProteinFeatures(SequenceI,SequenceI,MapList) : int computeProteinFeatures(SequenceI,SequenceI,MapList) : int	23692369	12.012	3.03	2.02	0.875 0.87587.5%
computePeptideVariants(SequenceI,int,List<DnaVariant>[]) : int computePeptideVariants(SequenceI,int,List<DnaVariant>[]) : int	24252425	36.036	13.013	6.06	0.9 0.990%
addPeptideVariant(SequenceI,int,String,DnaVariant,String,String) : boolean addPeptideVariant(SequenceI,int,String,DnaVariant,String,String) : boolean	25392539	33.033	11.011	4.04	0.92156863 0.9215686392.2%
buildDnaVariantsMap(SequenceI,MapList) : LinkedHashMap<Integer, List<DnaVariant>[]> buildDnaVariantsMap(SequenceI,MapList) : LinkedHashMap<Integer, List<DnaVariant>[]>	26262626	45.045	14.014	6.06	0.9130435 0.913043591.3%
makeCopyAlignment(SequenceI[],SequenceI[],AlignmentI) : AlignmentI makeCopyAlignment(SequenceI[],SequenceI[],AlignmentI) : AlignmentI	27572757	17.017	7.07	25.025	0.0 0.00%
alignAs(AlignmentI,AlignmentI) : int alignAs(AlignmentI,AlignmentI) : int	28062806	26.026	5.05	4.04	0.88235295 0.8823529588.2%
alignAsSameSequences(AlignmentI,AlignmentI) : boolean alignAsSameSequences(AlignmentI,AlignmentI) : boolean	28852885	17.017	6.06	3.03	0.88 0.8888%
buildMappedColumnsMap(AlignmentI,AlignmentI,List<SequenceI>) : SortedMap<Integer, Map<SequenceI, Character>> buildMappedColumnsMap(AlignmentI,AlignmentI,List<SequenceI>) : SortedMap<Integer, Map<SequenceI, Character>>	29482948	11.011	3.03	2.02	0.8666667 0.866666786.7%
addMappedPositions(SequenceI,SequenceI,Mapping,Map<Integer, Map<SequenceI, Character>>) : boolean addMappedPositions(SequenceI,SequenceI,Mapping,Map<Integer, Map<SequenceI, Character>>) : boolean	30023002	24.024	11.011	8.08	0.8 0.880%
looksLikeEnsembl(AlignmentI) : boolean looksLikeEnsembl(AlignmentI) : boolean	30773077	5.05	3.03	7.07	0.0 0.00%

Class AlignmentUtils.DnaVariant

Class AlignmentUtils.DnaVariant	Line # 88	Total Statements 6	Complexity 6	Uncovered Elements 5	TOTAL Coverage 0.6428571364.3%
DnaVariant(String) DnaVariant(String)	9494	2.02	1.01	0.00	1.0 1.0100%
DnaVariant(String,SequenceFeature) DnaVariant(String,SequenceFeature)	100100	2.02	1.01	0.00	1.0 1.0100%
getSource() : String getSource() : String	106106	1.01	2.02	1.01	0.6666667 0.666666766.7%
toString() : String toString() : String	114114	1.01	2.02	3.03	0.0 0.00%

Contributing tests

This file is covered by 89 tests. .

Contributing tests

Test contribution	Test	Result
0.12436364	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignmentjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment	1PASS
0.117818184	jalview.io.vcf.VCFLoaderTest.testDoLoad_vepCsqjalview.io.vcf.VCFLoaderTest.testDoLoad_vepCsq	1PASS
0.106181815	jalview.io.vcf.VCFLoaderTest.testDoLoad_reverseStrandjalview.io.vcf.VCFLoaderTest.testDoLoad_reverseStrand	1PASS
0.106181815	jalview.io.vcf.VCFLoaderTest.testDoLoadjalview.io.vcf.VCFLoaderTest.testDoLoad	1PASS
0.10690909	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withStartAndStopCodonsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withStartAndStopCodons	1PASS
0.104727276	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withXrefsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_withXrefs	1PASS
0.09309091	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_noXrefsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_noXrefs	1PASS
0.09090909	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_multipleProteinsjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_multipleProteins	1PASS
0.09018182	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_filterProductsjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_filterProducts	1PASS
0.08581818	jalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_alternativeTranscriptsjalview.analysis.AlignmentUtilsTests.testMakeCdsAlignment_alternativeTranscripts	1PASS
0.08072727	jalview.analysis.CrossRefTest.testFindXrefSequences_forGeneAndTranscriptsjalview.analysis.CrossRefTest.testFindXrefSequences_forGeneAndTranscripts	3FAIL
0.08	jalview.analysis.AlignmentUtilsTests.testComputePeptideVariantsjalview.analysis.AlignmentUtilsTests.testComputePeptideVariants	1PASS
0.07709091	jalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_prioritiseXrefsjalview.analysis.AlignmentUtilsTests.testMapProteinAlignmentToCdna_prioritiseXrefs	1PASS
0.077818185	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withIntronsjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withIntrons	1PASS
0.07490909	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_noIntronsjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_noIntrons	1PASS
0.07054546	jalview.io.FeaturesFileTest.simpleGff3FileClassjalview.io.FeaturesFileTest.simpleGff3FileClass	1PASS
0.07054546	jalview.io.FeaturesFileTest.simpleGff3FileLoaderjalview.io.FeaturesFileTest.simpleGff3FileLoader	1PASS
0.07054546	jalview.io.FeaturesFileTest.readGff3Filejalview.io.FeaturesFileTest.readGff3File	1PASS
0.07054546	jalview.io.FeaturesFileTest.simpleGff3RelaxedIdMatchingjalview.io.FeaturesFileTest.simpleGff3RelaxedIdMatching	1PASS
0.06763636	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_keepIntronGapsOnlyjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_keepIntronGapsOnly	1PASS
0.06690909	jalview.datamodel.AlignmentTest.testAlignAs_dnaAsDnajalview.datamodel.AlignmentTest.testAlignAs_dnaAsDna	1PASS
0.06472727	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withUnmappedProteinjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withMapping_withUnmappedProtein	1PASS
0.06545454	jalview.analysis.AlignmentUtilsTests.testAlignAs_alternateTranscriptsUngappedjalview.analysis.AlignmentUtilsTests.testAlignAs_alternateTranscriptsUngapped	1PASS
0.064	jalview.analysis.AlignmentUtilsTests.testAlignProteinAsDna_incompleteStartCodonjalview.analysis.AlignmentUtilsTests.testAlignProteinAsDna_incompleteStartCodon	1PASS
0.060363635	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withTrailingPeptidejalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_withTrailingPeptide	1PASS
0.05818182	jalview.analysis.AlignmentUtilsTests.testAlignProteinAsDnajalview.analysis.AlignmentUtilsTests.testAlignProteinAsDna	1PASS
0.057454545	jalview.datamodel.AlignmentTest.testAlignAs_proteinAsCdnajalview.datamodel.AlignmentTest.testAlignAs_proteinAsCdna	1PASS
0.05527273	jalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_mappedProteinProteinjalview.analysis.AlignmentUtilsTests.testAlignSequenceAs_mappedProteinProtein	1PASS
0.053090908	jalview.datamodel.AlignmentTest.testAlignAs_cdnaAsProteinjalview.datamodel.AlignmentTest.testAlignAs_cdnaAsProtein	1PASS
0.05381818	jalview.analysis.AlignmentUtilsTests.testIsMappablejalview.analysis.AlignmentUtilsTests.testIsMappable	1PASS
0.051636364	jalview.datamodel.AlignmentTest.testAlignAs_cdnaAsProtein_singleSequencejalview.datamodel.AlignmentTest.testAlignAs_cdnaAsProtein_singleSequence	1PASS
0.048	jalview.analysis.AlignmentUtilsTests.testBuildDnaVariantsMapjalview.analysis.AlignmentUtilsTests.testBuildDnaVariantsMap	1PASS
0.04218182	jalview.analysis.AlignmentUtilsTests.testExpandContextjalview.analysis.AlignmentUtilsTests.testExpandContext	1PASS
0.037818182	jalview.analysis.AlignmentUtilsTests.testMapCdsToProteinjalview.analysis.AlignmentUtilsTests.testMapCdsToProtein	1PASS
0.03418182	jalview.analysis.AlignmentUtilsTests.testExpandContext_annotationjalview.analysis.AlignmentUtilsTests.testExpandContext_annotation	1PASS
0.032	jalview.analysis.AlignmentUtilsTests.testMapCdnaToProtein_forSubsequencejalview.analysis.AlignmentUtilsTests.testMapCdnaToProtein_forSubsequence	1PASS
0.029818181	jalview.analysis.AlignmentUtilsTests.testTransferFeaturesjalview.analysis.AlignmentUtilsTests.testTransferFeatures	1PASS
0.027636364	jalview.analysis.AlignmentUtilsTests.testTranslatesAsjalview.analysis.AlignmentUtilsTests.testTranslatesAs	1PASS
0.024727272	jalview.analysis.AlignmentUtilsTests.testTransferFeatures_withOmitjalview.analysis.AlignmentUtilsTests.testTransferFeatures_withOmit	1PASS
0.023272727	jalview.analysis.AlignmentUtilsTests.testAddMappedPositionsjalview.analysis.AlignmentUtilsTests.testAddMappedPositions	1PASS
0.023272727	jalview.analysis.AlignmentUtilsTests.testAddMappedPositions_withStopCodonjalview.analysis.AlignmentUtilsTests.testAddMappedPositions_withStopCodon	1PASS
0.020363636	jalview.analysis.AlignmentUtilsTests.testTransferFeatures_withSelectjalview.analysis.AlignmentUtilsTests.testTransferFeatures_withSelect	1PASS
0.017454546	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenujalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu	1PASS
0.017454546	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_notOnAlignmentjalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_notOnAlignment	1PASS
0.016	jalview.analysis.AlignmentUtilsTests.testAlignAsSameSequencesjalview.analysis.AlignmentUtilsTests.testAlignAsSameSequences	1PASS
0.015272727	jalview.analysis.AlignmentUtilsTests.testAlignAsSameSequencesMultipleSubSeqjalview.analysis.AlignmentUtilsTests.testAlignAsSameSequencesMultipleSubSeq	1PASS
0.015272727	jalview.analysis.AlignmentUtilsTests.testFindCdsPositions_fivePrimeIncompletejalview.analysis.AlignmentUtilsTests.testFindCdsPositions_fivePrimeIncomplete	1PASS
0.013818182	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_alreadyAddedjalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_alreadyAdded	1PASS
0.012363636	jalview.ext.jmol.JmolViewerTest.testAddStrToSingleSeqViewJMoljalview.ext.jmol.JmolViewerTest.testAddStrToSingleSeqViewJMol	1PASS
0.012363636	jalview.analysis.AlignmentUtilsTests.testHasCrossRefjalview.analysis.AlignmentUtilsTests.testHasCrossRef	1PASS
0.012363636	jalview.analysis.AlignmentUtilsTests.testFindCdsPositionsjalview.analysis.AlignmentUtilsTests.testFindCdsPositions	1PASS
0.013090909	jalview.analysis.AlignmentUtilsTests.testHaveCrossRefjalview.analysis.AlignmentUtilsTests.testHaveCrossRef	1PASS
0.010181818	jalview.analysis.AlignmentUtilsTests.testGetSequencesByNamejalview.analysis.AlignmentUtilsTests.testGetSequencesByName	1PASS
0.010181818	jalview.analysis.AlignmentUtilsTests.testTransferGeneLocijalview.analysis.AlignmentUtilsTests.testTransferGeneLoci	1PASS
0.008	jalview.gui.AnnotationChooserTest.testDeselectType_showForSelectedjalview.gui.AnnotationChooserTest.testDeselectType_showForSelected	1PASS
0.008	jalview.gui.AnnotationChooserTest.testSelectType_showForAlljalview.gui.AnnotationChooserTest.testSelectType_showForAll	1PASS
0.008	jalview.gui.AnnotationChooserTest.testDeselectType_showForAlljalview.gui.AnnotationChooserTest.testDeselectType_showForAll	1PASS
0.008	jalview.gui.AlignFrameTest.testNewView_colourThresholdsjalview.gui.AlignFrameTest.testNewView_colourThresholds	1PASS
0.008	jalview.gui.AnnotationChooserTest.testSelectType_hideForSelectedjalview.gui.AnnotationChooserTest.testSelectType_hideForSelected	1PASS
0.008	jalview.analysis.AnnotationSorterTest.testSortByTypeAndSequence_autocalcFirstjalview.analysis.AnnotationSorterTest.testSortByTypeAndSequence_autocalcFirst	1PASS
0.008	jalview.gui.AnnotationChooserTest.testResetOriginalStatejalview.gui.AnnotationChooserTest.testResetOriginalState	1PASS
0.008	jalview.analysis.AnnotationSorterTest.testNoSort_autocalcFirstjalview.analysis.AnnotationSorterTest.testNoSort_autocalcFirst	1PASS
0.008	jalview.gui.AnnotationChooserTest.testSelectType_showForSelectedjalview.gui.AnnotationChooserTest.testSelectType_showForSelected	1PASS
0.008	jalview.gui.AlignFrameTest.testChangeColour_background_groupsAndThresholdsjalview.gui.AlignFrameTest.testChangeColour_background_groupsAndThresholds	1PASS
0.008	jalview.gui.AnnotationChooserTest.testDeselectType_hideForSelectedjalview.gui.AnnotationChooserTest.testDeselectType_hideForSelected	1PASS
0.008	jalview.gui.PopupMenuTest.testAddFeatureLinksjalview.gui.PopupMenuTest.testAddFeatureLinks	3FAIL
0.008	jalview.gui.AnnotationChooserTest.testIsInActionScope_selectedScopejalview.gui.AnnotationChooserTest.testIsInActionScope_selectedScope	1PASS
0.008	jalview.gui.AnnotationChooserTest.testBuildApplyToOptionsPanel_withSelectionGroupjalview.gui.AnnotationChooserTest.testBuildApplyToOptionsPanel_withSelectionGroup	1PASS
0.008	jalview.analysis.AnnotationSorterTest.testSortBySequenceAndType_autocalcLastjalview.analysis.AnnotationSorterTest.testSortBySequenceAndType_autocalcLast	1PASS
0.008	jalview.analysis.AnnotationSorterTest.testSort_timingSemisortedjalview.analysis.AnnotationSorterTest.testSort_timingSemisorted	1PASS
0.008	jalview.ext.jmol.JmolViewerTest.testSingleSeqViewJMoljalview.ext.jmol.JmolViewerTest.testSingleSeqViewJMol	1PASS
0.008	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_noReferenceAnnotationsjalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_noReferenceAnnotations	1PASS
0.008	jalview.analysis.AnnotationSorterTest.testSort_timingPresortedjalview.analysis.AnnotationSorterTest.testSort_timingPresorted	1PASS
0.008	jalview.analysis.AnnotationSorterTest.testSortByTypeAndSequence_autocalcLastjalview.analysis.AnnotationSorterTest.testSortByTypeAndSequence_autocalcLast	1PASS
0.008	jalview.io.Jalview2xmlTests.testStoreAndRecoverPDBEntryjalview.io.Jalview2xmlTests.testStoreAndRecoverPDBEntry	1PASS
0.008	jalview.analysis.AnnotationSorterTest.testSortBySequenceAndType_autocalcFirstjalview.analysis.AnnotationSorterTest.testSortBySequenceAndType_autocalcFirst	1PASS
0.008	jalview.gui.PopupMenuTest.testHideInsertionsjalview.gui.PopupMenuTest.testHideInsertions	1PASS
0.008727273	jalview.analysis.AlignmentUtilsTests.testShowOrHideSequenceAnnotationsjalview.analysis.AlignmentUtilsTests.testShowOrHideSequenceAnnotations	1PASS
0.008	jalview.io.Jalview2xmlTests.testTCoffeeScoresjalview.io.Jalview2xmlTests.testTCoffeeScores	1PASS
0.008	jalview.io.Jalview2xmlTests.testRNAStructureRecoveryjalview.io.Jalview2xmlTests.testRNAStructureRecovery	1PASS
0.008	jalview.analysis.AnnotationSorterTest.testSort_timingUnsortedjalview.analysis.AnnotationSorterTest.testSort_timingUnsorted	1PASS
0.008	jalview.gui.AnnotationChooserTest.testSelectType_hideForAlljalview.gui.AnnotationChooserTest.testSelectType_hideForAll	1PASS
0.008	jalview.gui.AnnotationChooserTest.testIsInActionScope_unselectedScopejalview.gui.AnnotationChooserTest.testIsInActionScope_unselectedScope	1PASS
0.008	jalview.gui.AnnotationChooserTest.testDeselectType_hideForAlljalview.gui.AnnotationChooserTest.testDeselectType_hideForAll	1PASS
0.008	jalview.io.AnnotatedPDBFileInputTest.testJalviewProjectRelocationAnnotationjalview.io.AnnotatedPDBFileInputTest.testJalviewProjectRelocationAnnotation	1PASS
0.008	jalview.io.Jalview2xmlTests.testStoreAndRecoverColourThresholdsjalview.io.Jalview2xmlTests.testStoreAndRecoverColourThresholds	1PASS
0.008	jalview.gui.AnnotationColumnChooserTest.testResetjalview.gui.AnnotationColumnChooserTest.testReset	1PASS
0.008	jalview.io.Jalview2xmlTests.testColourByAnnotScoresjalview.io.Jalview2xmlTests.testColourByAnnotScores	1PASS
0.0043636365	jalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_noSequenceSelectedjalview.gui.PopupMenuTest.testConfigureReferenceAnnotationsMenu_noSequenceSelected	1PASS

Source view

* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)

* Copyright (C) $$Year-Rel$$ The Jalview Authors

* This file is part of Jalview.

* Jalview is free software: you can redistribute it and/or

* modify it under the terms of the GNU General Public License

* as published by the Free Software Foundation, either version 3

* of the License, or (at your option) any later version.

* Jalview is distributed in the hope that it will be useful, but

* WITHOUT ANY WARRANTY; without even the implied warranty

* of MERCHANTABILITY or FITNESS FOR A PARTICULAR

* PURPOSE. See the GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with Jalview. If not, see <http://www.gnu.org/licenses/>.

* The Jalview Authors are detailed in the 'AUTHORS' file.

package jalview.analysis;

import static jalview.io.gff.GffConstants.CLINICAL_SIGNIFICANCE;

import jalview.datamodel.AlignedCodon;

import jalview.datamodel.AlignedCodonFrame;

import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;

import jalview.datamodel.Alignment;

import jalview.datamodel.AlignmentAnnotation;

import jalview.datamodel.AlignmentI;

import jalview.datamodel.DBRefEntry;

import jalview.datamodel.GeneLociI;

import jalview.datamodel.IncompleteCodonException;

import jalview.datamodel.Mapping;

import jalview.datamodel.Sequence;

import jalview.datamodel.SequenceFeature;

import jalview.datamodel.SequenceGroup;

import jalview.datamodel.SequenceI;

import jalview.datamodel.features.SequenceFeatures;

import jalview.io.gff.Gff3Helper;

import jalview.io.gff.SequenceOntologyI;

import jalview.schemes.ResidueProperties;

import jalview.util.Comparison;

import jalview.util.DBRefUtils;

import jalview.util.IntRangeComparator;

import jalview.util.MapList;

import jalview.util.MappingUtils;

import jalview.util.StringUtils;

import java.io.UnsupportedEncodingException;

import java.net.URLEncoder;

import java.util.ArrayList;

import java.util.Arrays;

import java.util.Collection;

import java.util.Collections;

import java.util.HashMap;

import java.util.HashSet;

import java.util.Iterator;

import java.util.LinkedHashMap;

import java.util.List;

import java.util.Map;

import java.util.Map.Entry;

import java.util.NoSuchElementException;

import java.util.Set;

import java.util.SortedMap;

import java.util.TreeMap;

/**

* grab bag of useful alignment manipulation operations Expect these to be

* refactored elsewhere at some point.

* @author jimp

public class AlignmentUtils

{

private static final int CODON_LENGTH = 3;

private static final String SEQUENCE_VARIANT = "sequence_variant:";

private static final String ID = "ID";

/**

* A data model to hold the 'normal' base value at a position, and an optional

* sequence variant feature

static final class DnaVariant

{

final String base;

SequenceFeature variant;

DnaVariant(String nuc)

{

base = nuc;

variant = null;

}

100

DnaVariant(String nuc, SequenceFeature var)

{

base = nuc;

variant = var;

}

public String getSource()

107

{

108

return variant == null ? null : variant.getFeatureGroup();

}

/**

* toString for aid in the debugger only

113

114

@Override

115

public String toString()

116

{

117

return base + ":" + (variant == null ? "" : variant.getDescription());

}

}

/**

* given an existing alignment, create a new alignment including all, or up to

123

* flankSize additional symbols from each sequence's dataset sequence

* @param core

* @param flankSize

* @return AlignmentI

public static AlignmentI expandContext(AlignmentI core, int flankSize)

130

{

131

List<SequenceI> sq = new ArrayList<>();

132

int maxoffset = 0;

133

for (SequenceI s : core.getSequences())

134

{

135

131

SequenceI newSeq = s.deriveSequence();

136

131

final int newSeqStart = newSeq.getStart() - 1;

137

131

if (newSeqStart > maxoffset

138

&& newSeq.getDatasetSequence().getStart() < s.getStart())

139

{

140

131

maxoffset = newSeqStart;

141

}

142

131

sq.add(newSeq);

}

if (flankSize > -1)

{

maxoffset = Math.min(maxoffset, flankSize);

}

* now add offset left and right to create an expanded alignment

151

152

for (SequenceI s : sq)

153

{

154

131

SequenceI ds = s;

155

262

while (ds.getDatasetSequence() != null)

156

{

157

131

ds = ds.getDatasetSequence();

158

}

159

131

int s_end = s.findPosition(s.getStart() + s.getLength());

160

// find available flanking residues for sequence

161

131

int ustream_ds = s.getStart() - ds.getStart();

162

131

int dstream_ds = ds.getEnd() - s_end;

163

164

// build new flanked sequence

165

166

// compute gap padding to start of flanking sequence

167

131

int offset = maxoffset - ustream_ds;

168

169

// padding is gapChar x ( maxoffset - min(ustream_ds, flank)

170

131

if (flankSize >= 0)

171

{

172

125

if (flankSize < ustream_ds)

173

{

174

// take up to flankSize residues

175

offset = maxoffset - flankSize;

176

ustream_ds = flankSize;

177

}

178

125

if (flankSize <= dstream_ds)

179

{

180

116

dstream_ds = flankSize - 1;

181

}

182

}

183

// TODO use Character.toLowerCase to avoid creating String objects?

184

131

char[] upstream = new String(ds

185

.getSequence(s.getStart() - 1 - ustream_ds, s.getStart() - 1))

186

.toLowerCase().toCharArray();

187

131

char[] downstream = new String(

188

ds.getSequence(s_end - 1, s_end + dstream_ds)).toLowerCase()

189

.toCharArray();

190

131

char[] coreseq = s.getSequence();

191

131

char[] nseq = new char[offset + upstream.length + downstream.length

192

+ coreseq.length];

193

131

char c = core.getGapCharacter();

194

195

131

int p = 0;

196

461

for (; p < offset; p++)

197

{

198

330

nseq[p] = c;

199

}

200

201

131

System.arraycopy(upstream, 0, nseq, p, upstream.length);

202

131

System.arraycopy(coreseq, 0, nseq, p + upstream.length,

203

coreseq.length);

204

131

System.arraycopy(downstream, 0, nseq,

205

p + coreseq.length + upstream.length, downstream.length);

206

131

s.setSequence(new String(nseq));

207

131

s.setStart(s.getStart() - ustream_ds);

208

131

s.setEnd(s_end + downstream.length);

209

}

210

AlignmentI newAl = new jalview.datamodel.Alignment(

211

sq.toArray(new SequenceI[0]));

212

for (SequenceI s : sq)

213

{

214

131

if (s.getAnnotation() != null)

215

{

216

for (AlignmentAnnotation aa : s.getAnnotation())

217

{

218

aa.adjustForAlignment(); // JAL-1712 fix

219

newAl.addAnnotation(aa);

}

}

}

newAl.setDataset(core.getDataset());

return newAl;

}

/**

* Returns the index (zero-based position) of a sequence in an alignment, or

* -1 if not found.

* @param al

* @param seq

* @return

57660

public static int getSequenceIndex(AlignmentI al, SequenceI seq)

236

{

237

57660

int result = -1;

238

57660

int pos = 0;

239

57660

for (SequenceI alSeq : al.getSequences())

240

{

241

125897562

if (alSeq == seq)

242

{

243

57660

result = pos;

244

57660

break;

245

}

246

125839902

pos++;

247

}

248

57660

return result;

}

/**

* Returns a map of lists of sequences in the alignment, keyed by sequence

253

* name. For use in mapping between different alignment views of the same

254

* sequences.

255

256

* @see jalview.datamodel.AlignmentI#getSequencesByName()

257

258

public static Map<String, List<SequenceI>> getSequencesByName(

259

AlignmentI al)

260

{

261

Map<String, List<SequenceI>> theMap = new LinkedHashMap<>();

262

for (SequenceI seq : al.getSequences())

263

{

264

String name = seq.getName();

265

if (name != null)

266

{

267

List<SequenceI> seqs = theMap.get(name);

268

if (seqs == null)

269

{

270

seqs = new ArrayList<>();

271

theMap.put(name, seqs);

}

seqs.add(seq);

}

}

return theMap;

}

/**

* Build mapping of protein to cDNA alignment. Mappings are made between

281

* sequences where the cDNA translates to the protein sequence. Any new

282

* mappings are added to the protein alignment. Returns true if any mappings

283

* either already exist or were added, else false.

284

285

* @param proteinAlignment

286

* @param cdnaAlignment

287

* @return

288

289

public static boolean mapProteinAlignmentToCdna(

290

final AlignmentI proteinAlignment, final AlignmentI cdnaAlignment)

291

{

292

if (proteinAlignment == null || cdnaAlignment == null)

{

return false;

}

Set<SequenceI> mappedDna = new HashSet<>();

298

Set<SequenceI> mappedProtein = new HashSet<>();

299

300

301

* First pass - map sequences where cross-references exist. This include

302

* 1-to-many mappings to support, for example, variant cDNA.

303

304

boolean mappingPerformed = mapProteinToCdna(proteinAlignment,

305

cdnaAlignment, mappedDna, mappedProtein, true);

306

307

308

* Second pass - map sequences where no cross-references exist. This only

309

* does 1-to-1 mappings and assumes corresponding sequences are in the same

310

* order in the alignments.

311

312

mappingPerformed |= mapProteinToCdna(proteinAlignment, cdnaAlignment,

313

mappedDna, mappedProtein, false);

314

return mappingPerformed;

}

/**

* Make mappings between compatible sequences (where the cDNA translation

319

* matches the protein).

320

321

* @param proteinAlignment

322

* @param cdnaAlignment

323

* @param mappedDna

324

* a set of mapped DNA sequences (to add to)

325

* @param mappedProtein

326

* a set of mapped Protein sequences (to add to)

327

* @param xrefsOnly

328

* if true, only map sequences where xrefs exist

329

* @return

330

331

protected static boolean mapProteinToCdna(

332

final AlignmentI proteinAlignment, final AlignmentI cdnaAlignment,

333

Set<SequenceI> mappedDna, Set<SequenceI> mappedProtein,

334

boolean xrefsOnly)

335

{

336

boolean mappingExistsOrAdded = false;

337

List<SequenceI> thisSeqs = proteinAlignment.getSequences();

338

for (SequenceI aaSeq : thisSeqs)

339

{

340

boolean proteinMapped = false;

341

AlignedCodonFrame acf = new AlignedCodonFrame();

342

343

for (SequenceI cdnaSeq : cdnaAlignment.getSequences())

344

{

345

346

* Always try to map if sequences have xref to each other; this supports

347

* variant cDNA or alternative splicing for a protein sequence.

348

349

* If no xrefs, try to map progressively, assuming that alignments have

350

* mappable sequences in corresponding order. These are not

351

* many-to-many, as that would risk mixing species with similar cDNA

352

* sequences.

353

354

if (xrefsOnly && !AlignmentUtils.haveCrossRef(aaSeq, cdnaSeq))

{

continue;

}

* Don't map non-xrefd sequences more than once each. This heuristic

361

* allows us to pair up similar sequences in ordered alignments.

362

363

if (!xrefsOnly && (mappedProtein.contains(aaSeq)

364

|| mappedDna.contains(cdnaSeq)))

{

continue;

}

if (mappingExists(proteinAlignment.getCodonFrames(),

369

aaSeq.getDatasetSequence(), cdnaSeq.getDatasetSequence()))

370

{

371

mappingExistsOrAdded = true;

}

else

{

MapList map = mapCdnaToProtein(aaSeq, cdnaSeq);

376

if (map != null)

377

{

378

acf.addMap(cdnaSeq, aaSeq, map);

379

mappingExistsOrAdded = true;

380

proteinMapped = true;

381

mappedDna.add(cdnaSeq);

382

mappedProtein.add(aaSeq);

}

}

}

if (proteinMapped)

{

proteinAlignment.addCodonFrame(acf);

389

}

390

}

391

return mappingExistsOrAdded;

}

/**

* Answers true if the mappings include one between the given (dataset)

396

* sequences.

397

398

protected static boolean mappingExists(List<AlignedCodonFrame> mappings,

399

SequenceI aaSeq, SequenceI cdnaSeq)

400

{

401

if (mappings != null)

402

{

403

for (AlignedCodonFrame acf : mappings)

404

{

405

if (cdnaSeq == acf.getDnaForAaSeq(aaSeq))

{

return true;

}

}

}

return false;

}

/**

* Builds a mapping (if possible) of a cDNA to a protein sequence.

416

* <ul>

417

* <li>first checks if the cdna translates exactly to the protein

418

* sequence</li>

419

* <li>else checks for translation after removing a STOP codon</li>

420

* <li>else checks for translation after removing a START codon</li>

421

* <li>if that fails, inspect CDS features on the cDNA sequence</li>

422

* </ul>

423

* Returns null if no mapping is determined.

424

425

* @param proteinSeq

426

* the aligned protein sequence

427

* @param cdnaSeq

428

* the aligned cdna sequence

429

* @return

430

431

public static MapList mapCdnaToProtein(SequenceI proteinSeq,

SequenceI cdnaSeq)

{

* Here we handle either dataset sequence set (desktop) or absent (applet).

436

* Use only the char[] form of the sequence to avoid creating possibly large

437

* String objects.

438

439

final SequenceI proteinDataset = proteinSeq.getDatasetSequence();

440

char[] aaSeqChars = proteinDataset != null

441

? proteinDataset.getSequence()

442

: proteinSeq.getSequence();

443

final SequenceI cdnaDataset = cdnaSeq.getDatasetSequence();

444

char[] cdnaSeqChars = cdnaDataset != null ? cdnaDataset.getSequence()

445

: cdnaSeq.getSequence();

446

if (aaSeqChars == null || cdnaSeqChars == null)

{

return null;

}

* cdnaStart/End, proteinStartEnd are base 1 (for dataset sequence mapping)

453

454

final int mappedLength = CODON_LENGTH * aaSeqChars.length;

455

int cdnaLength = cdnaSeqChars.length;

456

int cdnaStart = cdnaSeq.getStart();

457

int cdnaEnd = cdnaSeq.getEnd();

458

final int proteinStart = proteinSeq.getStart();

459

final int proteinEnd = proteinSeq.getEnd();

460

461

462

* If lengths don't match, try ignoring stop codon (if present)

463

464

if (cdnaLength != mappedLength && cdnaLength > 2)

465

{

466

String lastCodon = String.valueOf(cdnaSeqChars,

467

cdnaLength - CODON_LENGTH, CODON_LENGTH).toUpperCase();

468

for (String stop : ResidueProperties.STOP_CODONS)

469

{

470

if (lastCodon.equals(stop))

471

{

472

cdnaEnd -= CODON_LENGTH;

473

cdnaLength -= CODON_LENGTH;

break;

}

}

}

* If lengths still don't match, try ignoring start codon.

481

482

int startOffset = 0;

483

if (cdnaLength != mappedLength && cdnaLength > 2

484

&& String.valueOf(cdnaSeqChars, 0, CODON_LENGTH).toUpperCase()

485

.equals(ResidueProperties.START))

486

{

487

startOffset += CODON_LENGTH;

488

cdnaStart += CODON_LENGTH;

489

cdnaLength -= CODON_LENGTH;

490

}

491

492

if (translatesAs(cdnaSeqChars, startOffset, aaSeqChars))

493

{

494

495

* protein is translation of dna (+/- start/stop codons)

496

497

MapList map = new MapList(new int[] { cdnaStart, cdnaEnd },

498

new int[]

499

{ proteinStart, proteinEnd }, CODON_LENGTH, 1);

return map;

}

* translation failed - try mapping CDS annotated regions of dna

505

506

return mapCdsToProtein(cdnaSeq, proteinSeq);

}

/**

* Test whether the given cdna sequence, starting at the given offset,

511

* translates to the given amino acid sequence, using the standard translation

512

* table. Designed to fail fast i.e. as soon as a mismatch position is found.

513

514

* @param cdnaSeqChars

* @param cdnaStart

* @param aaSeqChars

* @return

protected static boolean translatesAs(char[] cdnaSeqChars, int cdnaStart,

520

char[] aaSeqChars)

521

{

522

if (cdnaSeqChars == null || aaSeqChars == null)

{

return false;

}

int aaPos = 0;

int dnaPos = cdnaStart;

529

155

for (; dnaPos < cdnaSeqChars.length - 2

530

&& aaPos < aaSeqChars.length; dnaPos += CODON_LENGTH, aaPos++)

531

{

532

124

String codon = String.valueOf(cdnaSeqChars, dnaPos, CODON_LENGTH);

533

124

final String translated = ResidueProperties.codonTranslate(codon);

534

535

536

* allow * in protein to match untranslatable in dna

537

538

124

final char aaRes = aaSeqChars[aaPos];

539

124

if ((translated == null || ResidueProperties.STOP.equals(translated))

&& aaRes == '*')

{

continue;

}

120

if (translated == null || !(aaRes == translated.charAt(0)))

545

{

546

// debug

547

// System.out.println(("Mismatch at " + i + "/" + aaResidue + ": "

548

// + codon + "(" + translated + ") != " + aaRes));

return false;

}

}

* check we matched all of the protein sequence

555

556

if (aaPos != aaSeqChars.length)

{

return false;

}

* check we matched all of the dna except

563

* for optional trailing STOP codon

564

565

if (dnaPos == cdnaSeqChars.length)

{

return true;

}

if (dnaPos == cdnaSeqChars.length - CODON_LENGTH)

570

{

571

String codon = String.valueOf(cdnaSeqChars, dnaPos, CODON_LENGTH);

572

if (ResidueProperties.STOP

573

.equals(ResidueProperties.codonTranslate(codon)))

{

return true;

}

}

return false;

}

/**

* Align sequence 'seq' to match the alignment of a mapped sequence. Note this

583

* currently assumes that we are aligning cDNA to match protein.

584

585

* @param seq

586

* the sequence to be realigned

587

* @param al

588

* the alignment whose sequence alignment is to be 'copied'

589

* @param gap

590

* character string represent a gap in the realigned sequence

591

* @param preserveUnmappedGaps

592

* @param preserveMappedGaps

593

* @return true if the sequence was realigned, false if it could not be

594

595

public static boolean alignSequenceAs(SequenceI seq, AlignmentI al,

596

String gap, boolean preserveMappedGaps,

597

boolean preserveUnmappedGaps)

598

{

599

600

* Get any mappings from the source alignment to the target (dataset)

601

* sequence.

602

603

// TODO there may be one AlignedCodonFrame per dataset sequence, or one with

604

// all mappings. Would it help to constrain this?

605

List<AlignedCodonFrame> mappings = al.getCodonFrame(seq);

606

if (mappings == null || mappings.isEmpty())

{

return false;

}

* Locate the aligned source sequence whose dataset sequence is mapped. We

613

* just take the first match here (as we can't align like more than one

614

* sequence).

615

616

SequenceI alignFrom = null;

617

AlignedCodonFrame mapping = null;

618

for (AlignedCodonFrame mp : mappings)

619

{

620

alignFrom = mp.findAlignedSequence(seq, al);

621

if (alignFrom != null)

{

mapping = mp;

break;

}

}

if (alignFrom == null)

{

return false;

}

alignSequenceAs(seq, alignFrom, mapping, gap, al.getGapCharacter(),

633

preserveMappedGaps, preserveUnmappedGaps);

return true;

}

/**

* Align sequence 'alignTo' the same way as 'alignFrom', using the mapping to

639

* match residues and codons. Flags control whether existing gaps in unmapped

640

* (intron) and mapped (exon) regions are preserved or not. Gaps between

641

* intron and exon are only retained if both flags are set.

* @param alignTo

* @param alignFrom

* @param mapping

* @param myGap

* @param sourceGap

* @param preserveUnmappedGaps

649

* @param preserveMappedGaps

650

651

public static void alignSequenceAs(SequenceI alignTo, SequenceI alignFrom,

652

AlignedCodonFrame mapping, String myGap, char sourceGap,

653

boolean preserveMappedGaps, boolean preserveUnmappedGaps)

654

{

655

// TODO generalise to work for Protein-Protein, dna-dna, dna-protein

656

657

// aligned and dataset sequence positions, all base zero

int thisSeqPos = 0;

int sourceDsPos = 0;

int basesWritten = 0;

662

char myGapChar = myGap.charAt(0);

663

int ratio = myGap.length();

664

665

int fromOffset = alignFrom.getStart() - 1;

666

int toOffset = alignTo.getStart() - 1;

667

int sourceGapMappedLength = 0;

668

boolean inExon = false;

669

final int toLength = alignTo.getLength();

670

final int fromLength = alignFrom.getLength();

671

StringBuilder thisAligned = new StringBuilder(2 * toLength);

672

673

674

* Traverse the 'model' aligned sequence

675

676

205

for (int i = 0; i < fromLength; i++)

677

{

678

186

char sourceChar = alignFrom.getCharAt(i);

679

186

if (sourceChar == sourceGap)

680

{

681

sourceGapMappedLength += ratio;

continue;

}

* Found a non-gap character. Locate its mapped region if any.

687

688

142

sourceDsPos++;

689

// Note mapping positions are base 1, our sequence positions base 0

690

142

int[] mappedPos = mapping.getMappedRegion(alignTo, alignFrom,

691

sourceDsPos + fromOffset);

692

142

if (mappedPos == null)

693

{

694

695

* unmapped position; treat like a gap

696

697

sourceGapMappedLength += ratio;

698

// System.err.println("Can't align: no codon mapping to residue "

699

// + sourceDsPos + "(" + sourceChar + ")");

// return;

continue;

}

int mappedCodonStart = mappedPos[0]; // position (1...) of codon start

705

int mappedCodonEnd = mappedPos[mappedPos.length - 1]; // codon end pos

706

StringBuilder trailingCopiedGap = new StringBuilder();

707

708

709

* Copy dna sequence up to and including this codon. Optionally, include

710

* gaps before the codon starts (in introns) and/or after the codon starts

711

* (in exons).

712

713

* Note this only works for 'linear' splicing, not reverse or interleaved.

714

* But then 'align dna as protein' doesn't make much sense otherwise.

715

716

int intronLength = 0;

717

294

while (basesWritten + toOffset < mappedCodonEnd

718

&& thisSeqPos < toLength)

719

{

720

246

final char c = alignTo.getCharAt(thisSeqPos++);

721

246

if (c != myGapChar)

722

{

723

146

basesWritten++;

724

146

int sourcePosition = basesWritten + toOffset;

725

146

if (sourcePosition < mappedCodonStart)

726

{

727

728

* Found an unmapped (intron) base. First add in any preceding gaps

729

* (if wanted).

730

731

if (preserveUnmappedGaps && trailingCopiedGap.length() > 0)

732

{

733

thisAligned.append(trailingCopiedGap.toString());

734

intronLength += trailingCopiedGap.length();

735

trailingCopiedGap = new StringBuilder();

}

intronLength++;

inExon = false;

}

else

{

final boolean startOfCodon = sourcePosition == mappedCodonStart;

743

int gapsToAdd = calculateGapsToInsert(preserveMappedGaps,

744

preserveUnmappedGaps, sourceGapMappedLength, inExon,

745

trailingCopiedGap.length(), intronLength, startOfCodon);

746

215

for (int k = 0; k < gapsToAdd; k++)

747

{

748

117

thisAligned.append(myGapChar);

749

}

750

sourceGapMappedLength = 0;

751

inExon = true;

752

}

753

146

thisAligned.append(c);

754

146

trailingCopiedGap = new StringBuilder();

}

else

{

100

if (inExon && preserveMappedGaps)

759

{

760

trailingCopiedGap.append(myGapChar);

761

}

762

else if (!inExon && preserveUnmappedGaps)

763

{

764

trailingCopiedGap.append(myGapChar);

}

}

}

}

* At end of model aligned sequence. Copy any remaining target sequence, optionally

772

* including (intron) gaps.

773

774

129

while (thisSeqPos < toLength)

775

{

776

110

final char c = alignTo.getCharAt(thisSeqPos++);

777

110

if (c != myGapChar || preserveUnmappedGaps)

778

{

779

102

thisAligned.append(c);

780

}

781

110

sourceGapMappedLength--;

}

* finally add gaps to pad for any trailing source gaps or

786

* unmapped characters

787

788

if (preserveUnmappedGaps)

789

{

790

while (sourceGapMappedLength > 0)

791

{

792

thisAligned.append(myGapChar);

793

sourceGapMappedLength--;

}

}

* All done aligning, set the aligned sequence.

799

800

alignTo.setSequence(new String(thisAligned));

}

/**

* Helper method to work out how many gaps to insert when realigning.

805

806

* @param preserveMappedGaps

807

* @param preserveUnmappedGaps

808

* @param sourceGapMappedLength

809

* @param inExon

810

* @param trailingCopiedGap

811

* @param intronLength

812

* @param startOfCodon

813

* @return

814

815

protected static int calculateGapsToInsert(boolean preserveMappedGaps,

816

boolean preserveUnmappedGaps, int sourceGapMappedLength,

817

boolean inExon, int trailingGapLength, int intronLength,

818

final boolean startOfCodon)

{

int gapsToAdd = 0;

if (startOfCodon)

{

* Reached start of codon. Ignore trailing gaps in intron unless we are

825

* preserving gaps in both exon and intron. Ignore them anyway if the

826

* protein alignment introduces a gap at least as large as the intronic

827

* region.

828

829

if (inExon && !preserveMappedGaps)

830

{

831

trailingGapLength = 0;

832

}

833

if (!inExon && !(preserveMappedGaps && preserveUnmappedGaps))

834

{

835

trailingGapLength = 0;

}

if (inExon)

{

gapsToAdd = Math.max(sourceGapMappedLength, trailingGapLength);

}

else

{

if (intronLength + trailingGapLength <= sourceGapMappedLength)

844

{

845

gapsToAdd = sourceGapMappedLength - intronLength;

}

else

{

gapsToAdd = Math.min(

850

intronLength + trailingGapLength - sourceGapMappedLength,

trailingGapLength);

}

}

}

else

{

* second or third base of codon; check for any gaps in dna

859

860

if (!preserveMappedGaps)

861

{

862

trailingGapLength = 0;

863

}

864

gapsToAdd = Math.max(sourceGapMappedLength, trailingGapLength);

}

return gapsToAdd;

}

/**

* Realigns the given protein to match the alignment of the dna, using codon

871

* mappings to translate aligned codon positions to protein residues.

872

873

* @param protein

874

* the alignment whose sequences are realigned by this method

875

* @param dna

876

* the dna alignment whose alignment we are 'copying'

877

* @return the number of sequences that were realigned

878

879

public static int alignProteinAsDna(AlignmentI protein, AlignmentI dna)

880

{

881

if (protein.isNucleotide() || !dna.isNucleotide())

882

{

883

System.err.println("Wrong alignment type in alignProteinAsDna");

884

return 0;

885

}

886

List<SequenceI> unmappedProtein = new ArrayList<>();

887

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons = buildCodonColumnsMap(

888

protein, dna, unmappedProtein);

889

return alignProteinAs(protein, alignedCodons, unmappedProtein);

}

/**

* Realigns the given dna to match the alignment of the protein, using codon

894

* mappings to translate aligned peptide positions to codons.

895

896

* Always produces a padded CDS alignment.

897

898

* @param dna

899

* the alignment whose sequences are realigned by this method

900

* @param protein

901

* the protein alignment whose alignment we are 'copying'

902

* @return the number of sequences that were realigned

903

904

public static int alignCdsAsProtein(AlignmentI dna, AlignmentI protein)

905

{

906

if (protein.isNucleotide() || !dna.isNucleotide())

907

{

908

System.err.println("Wrong alignment type in alignProteinAsDna");

909

return 0;

910

}

911

// todo: implement this

912

List<AlignedCodonFrame> mappings = protein.getCodonFrames();

913

int alignedCount = 0;

914

int width = 0; // alignment width for padding CDS

915

for (SequenceI dnaSeq : dna.getSequences())

916

{

917

if (alignCdsSequenceAsProtein(dnaSeq, protein, mappings,

918

dna.getGapCharacter()))

{

alignedCount++;

}

width = Math.max(dnaSeq.getLength(), width);

}

int oldwidth;

int diff;

for (SequenceI dnaSeq : dna.getSequences())

927

{

928

oldwidth = dnaSeq.getLength();

929

diff = width - oldwidth;

930

if (diff > 0)

931

{

932

dnaSeq.insertCharAt(oldwidth, diff, dna.getGapCharacter());

}

}

return alignedCount;

}

/**

* Helper method to align (if possible) the dna sequence to match the

940

* alignment of a mapped protein sequence. This is currently limited to

941

* handling coding sequence only.

* @param cdsSeq

* @param protein

* @param mappings

* @param gapChar

* @return

static boolean alignCdsSequenceAsProtein(SequenceI cdsSeq,

950

AlignmentI protein, List<AlignedCodonFrame> mappings,

951

char gapChar)

952

{

953

SequenceI cdsDss = cdsSeq.getDatasetSequence();

if (cdsDss == null)

{

System.err

.println("alignCdsSequenceAsProtein needs aligned sequence!");

return false;

}

List<AlignedCodonFrame> dnaMappings = MappingUtils

962

.findMappingsForSequence(cdsSeq, mappings);

963

for (AlignedCodonFrame mapping : dnaMappings)

964

{

965

SequenceI peptide = mapping.findAlignedSequence(cdsSeq, protein);

966

if (peptide != null)

967

{

968

final int peptideLength = peptide.getLength();

969

Mapping map = mapping.getMappingBetween(cdsSeq, peptide);

970

if (map != null)

971

{

972

MapList mapList = map.getMap();

973

if (map.getTo() == peptide.getDatasetSequence())

974

{

975

mapList = mapList.getInverse();

976

}

977

final int cdsLength = cdsDss.getLength();

978

int mappedFromLength = MappingUtils.getLength(mapList

979

.getFromRanges());

980

int mappedToLength = MappingUtils

981

.getLength(mapList.getToRanges());

982

boolean addStopCodon = (cdsLength == mappedFromLength

983

* CODON_LENGTH + CODON_LENGTH)

984

|| (peptide.getDatasetSequence()

985

.getLength() == mappedFromLength - 1);

986

if (cdsLength != mappedToLength && !addStopCodon)

987

{

988

System.err.println(String.format(

989

"Can't align cds as protein (length mismatch %d/%d): %s",

990

cdsLength, mappedToLength, cdsSeq.getName()));

}

* pre-fill the aligned cds sequence with gaps

995

996

char[] alignedCds = new char[peptideLength * CODON_LENGTH

997

+ (addStopCodon ? CODON_LENGTH : 0)];

998

Arrays.fill(alignedCds, gapChar);

999

1000

1001

* walk over the aligned peptide sequence and insert mapped

1002

* codons for residues in the aligned cds sequence

1003

1004

int copiedBases = 0;

1005

int cdsStart = cdsDss.getStart();

1006

int proteinPos = peptide.getStart() - 1;

1007

int cdsCol = 0;

1008

1009

for (int col = 0; col < peptideLength; col++)

1010

{

1011

char residue = peptide.getCharAt(col);

1012

1013

if (Comparison.isGap(residue))

1014

{

1015

cdsCol += CODON_LENGTH;

}

else

{

proteinPos++;

int[] codon = mapList.locateInTo(proteinPos, proteinPos);

1021

if (codon == null)

1022

{

1023

// e.g. incomplete start codon, X in peptide

1024

cdsCol += CODON_LENGTH;

}

else

{

for (int j = codon[0]; j <= codon[1]; j++)

1029

{

1030

char mappedBase = cdsDss.getCharAt(j - cdsStart);

1031

alignedCds[cdsCol++] = mappedBase;

copiedBases++;

}

}

}

}

* append stop codon if not mapped from protein,

1040

* closing it up to the end of the mapped sequence

1041

1042

if (copiedBases == cdsLength - CODON_LENGTH)

1043

{

1044

for (int i = alignedCds.length - 1; i >= 0; i--)

1045

{

1046

if (!Comparison.isGap(alignedCds[i]))

1047

{

1048

cdsCol = i + 1; // gap just after end of sequence

break;

}

}

for (int i = cdsLength - CODON_LENGTH; i < cdsLength; i++)

1053

{

1054

alignedCds[cdsCol++] = cdsDss.getCharAt(i);

1055

}

1056

}

1057

cdsSeq.setSequence(new String(alignedCds));

return true;

}

}

}

return false;

}

/**

* Builds a map whose key is an aligned codon position (3 alignment column

1067

* numbers base 0), and whose value is a map from protein sequence to each

1068

* protein's peptide residue for that codon. The map generates an ordering of

1069

* the codons, and allows us to read off the peptides at each position in

1070

* order to assemble 'aligned' protein sequences.

1071

1072

* @param protein

1073

* the protein alignment

1074

* @param dna

1075

* the coding dna alignment

1076

* @param unmappedProtein

1077

* any unmapped proteins are added to this list

1078

* @return

1079

1080

protected static Map<AlignedCodon, Map<SequenceI, AlignedCodon>> buildCodonColumnsMap(

1081

AlignmentI protein, AlignmentI dna,

1082

List<SequenceI> unmappedProtein)

1083

{

1084

1085

* maintain a list of any proteins with no mappings - these will be

1086

* rendered 'as is' in the protein alignment as we can't align them

1087

1088

unmappedProtein.addAll(protein.getSequences());

1089

1090

List<AlignedCodonFrame> mappings = protein.getCodonFrames();

1091

1092

1093

* Map will hold, for each aligned codon position e.g. [3, 5, 6], a map of

1094

* {dnaSequence, {proteinSequence, codonProduct}} at that position. The

1095

* comparator keeps the codon positions ordered.

1096

1097

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons = new TreeMap<>(

1098

new CodonComparator());

1099

1100

for (SequenceI dnaSeq : dna.getSequences())

1101

{

1102

for (AlignedCodonFrame mapping : mappings)

1103

{

1104

SequenceI prot = mapping.findAlignedSequence(dnaSeq, protein);

1105

if (prot != null)

1106

{

1107

Mapping seqMap = mapping.getMappingForSequence(dnaSeq);

1108

addCodonPositions(dnaSeq, prot, protein.getGapCharacter(), seqMap,

1109

alignedCodons);

1110

unmappedProtein.remove(prot);

}

}

}

* Finally add any unmapped peptide start residues (e.g. for incomplete

1117

* codons) as if at the codon position before the second residue

1118

1119

// TODO resolve JAL-2022 so this fudge can be removed

1120

int mappedSequenceCount = protein.getHeight() - unmappedProtein.size();

1121

addUnmappedPeptideStarts(alignedCodons, mappedSequenceCount);

1122

1123

return alignedCodons;

}

/**

* Scans for any protein mapped from position 2 (meaning unmapped start

1128

* position e.g. an incomplete codon), and synthesizes a 'codon' for it at the

1129

* preceding position in the alignment

1130

1131

* @param alignedCodons

1132

* the codon-to-peptide map

1133

* @param mappedSequenceCount

1134

* the number of distinct sequences in the map

1135

1136

protected static void addUnmappedPeptideStarts(

1137

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons,

1138

int mappedSequenceCount)

1139

{

1140

// TODO delete this ugly hack once JAL-2022 is resolved

1141

// i.e. we can model startPhase > 0 (incomplete start codon)

1142

1143

List<SequenceI> sequencesChecked = new ArrayList<>();

1144

AlignedCodon lastCodon = null;

1145

Map<SequenceI, AlignedCodon> toAdd = new HashMap<>();

1146

1147

for (Entry<AlignedCodon, Map<SequenceI, AlignedCodon>> entry : alignedCodons

1148

.entrySet())

1149

{

1150

for (Entry<SequenceI, AlignedCodon> sequenceCodon : entry.getValue()

1151

.entrySet())

1152

{

1153

SequenceI seq = sequenceCodon.getKey();

1154

if (sequencesChecked.contains(seq))

{

continue;

}

sequencesChecked.add(seq);

1159

AlignedCodon codon = sequenceCodon.getValue();

1160

if (codon.peptideCol > 1)

1161

{

1162

System.err.println(

1163

"Problem mapping protein with >1 unmapped start positions: "

1164

+ seq.getName());

1165

}

1166

else if (codon.peptideCol == 1)

1167

{

1168

1169

* first position (peptideCol == 0) was unmapped - add it

1170

1171

if (lastCodon != null)

1172

{

1173

AlignedCodon firstPeptide = new AlignedCodon(lastCodon.pos1,

1174

lastCodon.pos2, lastCodon.pos3,

1175

String.valueOf(seq.getCharAt(0)), 0);

1176

toAdd.put(seq, firstPeptide);

}

else

{

* unmapped residue at start of alignment (no prior column) -

1182

* 'insert' at nominal codon [0, 0, 0]

1183

1184

AlignedCodon firstPeptide = new AlignedCodon(0, 0, 0,

1185

String.valueOf(seq.getCharAt(0)), 0);

1186

toAdd.put(seq, firstPeptide);

1187

}

1188

}

1189

if (sequencesChecked.size() == mappedSequenceCount)

1190

{

1191

// no need to check past first mapped position in all sequences

break;

}

}

lastCodon = entry.getKey();

}

* add any new codons safely after iterating over the map

1200

1201

for (Entry<SequenceI, AlignedCodon> startCodon : toAdd.entrySet())

1202

{

1203

addCodonToMap(alignedCodons, startCodon.getValue(),

1204

startCodon.getKey());

}

}

/**

* Update the aligned protein sequences to match the codon alignments given in

* the map.

* @param protein

* @param alignedCodons

1214

* an ordered map of codon positions (columns), with sequence/peptide

1215

* values present in each column

1216

* @param unmappedProtein

1217

* @return

1218

1219

protected static int alignProteinAs(AlignmentI protein,

1220

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons,

1221

List<SequenceI> unmappedProtein)

1222

{

1223

1224

* prefill peptide sequences with gaps

1225

1226

int alignedWidth = alignedCodons.size();

1227

char[] gaps = new char[alignedWidth];

1228

Arrays.fill(gaps, protein.getGapCharacter());

1229

Map<SequenceI, char[]> peptides = new HashMap<>();

1230

for (SequenceI seq : protein.getSequences())

1231

{

1232

if (!unmappedProtein.contains(seq))

1233

{

1234

peptides.put(seq, Arrays.copyOf(gaps, gaps.length));

}

}

* Traverse the codons left to right (as defined by CodonComparator)

1240

* and insert peptides in each column where the sequence is mapped.

1241

* This gives a peptide 'alignment' where residues are aligned if their

1242

* corresponding codons occupy the same columns in the cdna alignment.

1243

1244

int column = 0;

1245

for (AlignedCodon codon : alignedCodons.keySet())

1246

{

1247

final Map<SequenceI, AlignedCodon> columnResidues = alignedCodons

1248

.get(codon);

1249

for (Entry<SequenceI, AlignedCodon> entry : columnResidues.entrySet())

1250

{

1251

char residue = entry.getValue().product.charAt(0);

1252

peptides.get(entry.getKey())[column] = residue;

}

column++;

}

* and finally set the constructed sequences

1259

1260

for (Entry<SequenceI, char[]> entry : peptides.entrySet())

1261

{

1262

entry.getKey().setSequence(new String(entry.getValue()));

}

return 0;

}

/**

* Populate the map of aligned codons by traversing the given sequence

1270

* mapping, locating the aligned positions of mapped codons, and adding those

1271

* positions and their translation products to the map.

1272

1273

* @param dna

1274

* the aligned sequence we are mapping from

1275

* @param protein

1276

* the sequence to be aligned to the codons

1277

* @param gapChar

1278

* the gap character in the dna sequence

1279

* @param seqMap

1280

* a mapping to a sequence translation

1281

* @param alignedCodons

1282

* the map we are building up

1283

1284

static void addCodonPositions(SequenceI dna, SequenceI protein,

1285

char gapChar, Mapping seqMap,

1286

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons)

1287

{

1288

Iterator<AlignedCodon> codons = seqMap.getCodonIterator(dna, gapChar);

1289

1290

1291

* add codon positions, and their peptide translations, to the alignment

1292

* map, while remembering the first codon mapped

1293

1294

while (codons.hasNext())

{

try

{

AlignedCodon codon = codons.next();

1299

addCodonToMap(alignedCodons, codon, protein);

1300

} catch (IncompleteCodonException e)

1301

{

1302

// possible incomplete trailing codon - ignore

1303

} catch (NoSuchElementException e)

1304

{

1305

// possibly peptide lacking STOP

}

}

}

/**

* Helper method to add a codon-to-peptide entry to the aligned codons map

1312

1313

* @param alignedCodons

* @param codon

* @param protein

protected static void addCodonToMap(

1318

Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons,

1319

AlignedCodon codon, SequenceI protein)

1320

{

1321

Map<SequenceI, AlignedCodon> seqProduct = alignedCodons.get(codon);

1322

if (seqProduct == null)

1323

{

1324

seqProduct = new HashMap<>();

1325

alignedCodons.put(codon, seqProduct);

1326

}

1327

seqProduct.put(protein, codon);

}

/**

* Returns true if a cDNA/Protein mapping either exists, or could be made,

1332

* between at least one pair of sequences in the two alignments. Currently,

1333

* the logic is:

1334

* <ul>

1335

* <li>One alignment must be nucleotide, and the other protein</li>

1336

* <li>At least one pair of sequences must be already mapped, or mappable</li>

1337

* <li>Mappable means the nucleotide translation matches the protein

1338

* sequence</li>

1339

* <li>The translation may ignore start and stop codons if present in the

* nucleotide</li>

* </ul>

* @param al1

* @param al2

* @return

public static boolean isMappable(AlignmentI al1, AlignmentI al2)

1348

{

1349

if (al1 == null || al2 == null)

{

return false;

}

* Require one nucleotide and one protein

1356

1357

if (al1.isNucleotide() == al2.isNucleotide())

{

return false;

}

AlignmentI dna = al1.isNucleotide() ? al1 : al2;

1362

AlignmentI protein = dna == al1 ? al2 : al1;

1363

List<AlignedCodonFrame> mappings = protein.getCodonFrames();

1364

for (SequenceI dnaSeq : dna.getSequences())

1365

{

1366

for (SequenceI proteinSeq : protein.getSequences())

1367

{

1368

if (isMappable(dnaSeq, proteinSeq, mappings))

{

return true;

}

}

}

return false;

}

/**

* Returns true if the dna sequence is mapped, or could be mapped, to the

* protein sequence.

* @param dnaSeq

* @param proteinSeq

* @param mappings

* @return

protected static boolean isMappable(SequenceI dnaSeq,

1387

SequenceI proteinSeq, List<AlignedCodonFrame> mappings)

1388

{

1389

if (dnaSeq == null || proteinSeq == null)

{

return false;

}

SequenceI dnaDs = dnaSeq.getDatasetSequence() == null ? dnaSeq

1395

: dnaSeq.getDatasetSequence();

1396

SequenceI proteinDs = proteinSeq.getDatasetSequence() == null

1397

? proteinSeq

1398

: proteinSeq.getDatasetSequence();

1399

1400

for (AlignedCodonFrame mapping : mappings)

1401

{

1402

if (proteinDs == mapping.getAaForDnaSeq(dnaDs))

{

* already mapped

return true;

}

}

* Just try to make a mapping (it is not yet stored), test whether

1413

* successful.

1414

1415

return mapCdnaToProtein(proteinDs, dnaDs) != null;

}

/**

* Finds any reference annotations associated with the sequences in

1420

* sequenceScope, that are not already added to the alignment, and adds them

1421

* to the 'candidates' map. Also populates a lookup table of annotation

1422

* labels, keyed by calcId, for use in constructing tooltips or the like.

1423

1424

* @param sequenceScope

1425

* the sequences to scan for reference annotations

1426

* @param labelForCalcId

1427

* (optional) map to populate with label for calcId

1428

* @param candidates

1429

* map to populate with annotations for sequence

1430

* @param al

1431

* the alignment to check for presence of annotations

1432

1433

public static void findAddableReferenceAnnotations(

1434

List<SequenceI> sequenceScope, Map<String, String> labelForCalcId,

1435

final Map<SequenceI, List<AlignmentAnnotation>> candidates,

1436

AlignmentI al)

1437

{

1438

if (sequenceScope == null)

{

return;

}

* For each sequence in scope, make a list of any annotations on the

1445

* underlying dataset sequence which are not already on the alignment.

1446

1447

* Add to a map of { alignmentSequence, <List of annotations to add> }

1448

1449

for (SequenceI seq : sequenceScope)

1450

{

1451

SequenceI dataset = seq.getDatasetSequence();

if (dataset == null)

{

continue;

}

AlignmentAnnotation[] datasetAnnotations = dataset.getAnnotation();

1457

if (datasetAnnotations == null)

{

continue;

}

final List<AlignmentAnnotation> result = new ArrayList<>();

1462

for (AlignmentAnnotation dsann : datasetAnnotations)

1463

{

1464

1465

* Find matching annotations on the alignment. If none is found, then

1466

* add this annotation to the list of 'addable' annotations for this

1467

* sequence.

1468

1469

final Iterable<AlignmentAnnotation> matchedAlignmentAnnotations = al

1470

.findAnnotations(seq, dsann.getCalcId(), dsann.label);

1471

if (!matchedAlignmentAnnotations.iterator().hasNext())

1472

{

1473

result.add(dsann);

1474

if (labelForCalcId != null)

1475

{

1476

labelForCalcId.put(dsann.getCalcId(), dsann.label);

}

}

}

* Save any addable annotations for this sequence

1482

1483

if (!result.isEmpty())

1484

{

1485

candidates.put(seq, result);

}

}

}

/**

* Adds annotations to the top of the alignment annotations, in the same order

1492

* as their related sequences.

1493

1494

* @param annotations

1495

* the annotations to add

1496

* @param alignment

1497

* the alignment to add them to

1498

* @param selectionGroup

1499

* current selection group (or null if none)

1500

1501

public static void addReferenceAnnotations(

1502

Map<SequenceI, List<AlignmentAnnotation>> annotations,

1503

final AlignmentI alignment, final SequenceGroup selectionGroup)

1504

{

1505

for (SequenceI seq : annotations.keySet())

1506

{

1507

for (AlignmentAnnotation ann : annotations.get(seq))

1508

{

1509

AlignmentAnnotation copyAnn = new AlignmentAnnotation(ann);

1510

int startRes = 0;

1511

int endRes = ann.annotations.length;

1512

if (selectionGroup != null)

1513

{

1514

startRes = selectionGroup.getStartRes();

1515

endRes = selectionGroup.getEndRes();

1516

}

1517

copyAnn.restrict(startRes, endRes);

1518

1519

1520

* Add to the sequence (sets copyAnn.datasetSequence), unless the

1521

* original annotation is already on the sequence.

1522

1523

if (!seq.hasAnnotation(ann))

1524

{

1525

seq.addAlignmentAnnotation(copyAnn);

1526

}

1527

// adjust for gaps

1528

copyAnn.adjustForAlignment();

1529

// add to the alignment and set visible

1530

alignment.addAnnotation(copyAnn);

1531

copyAnn.visible = true;

}

}

}

/**

* Set visibility of alignment annotations of specified types (labels), for

1538

* specified sequences. This supports controls like "Show all secondary

1539

* structure", "Hide all Temp factor", etc.

1540

1541

* @al the alignment to scan for annotations

1542

* @param types

1543

* the types (labels) of annotations to be updated

1544

* @param forSequences

1545

* if not null, only annotations linked to one of these sequences are

1546

* in scope for update; if null, acts on all sequence annotations

1547

* @param anyType

1548

* if this flag is true, 'types' is ignored (label not checked)

1549

* @param doShow

1550

* if true, set visibility on, else set off

1551

1552

public static void showOrHideSequenceAnnotations(AlignmentI al,

1553

Collection<String> types, List<SequenceI> forSequences,

1554

boolean anyType, boolean doShow)

1555

{

1556

AlignmentAnnotation[] anns = al.getAlignmentAnnotation();

1557

if (anns != null)

1558

{

1559

for (AlignmentAnnotation aa : anns)

1560

{

1561

if (anyType || types.contains(aa.label))

1562

{

1563

if ((aa.sequenceRef != null) && (forSequences == null

1564

|| forSequences.contains(aa.sequenceRef)))

{

aa.visible = doShow;

}

}

}

}

}

/**

* Returns true if either sequence has a cross-reference to the other

* @param seq1

* @param seq2

* @return

public static boolean haveCrossRef(SequenceI seq1, SequenceI seq2)

1581

{

1582

// Note: moved here from class CrossRef as the latter class has dependencies

1583

// not availability to the applet's classpath

1584

return hasCrossRef(seq1, seq2) || hasCrossRef(seq2, seq1);

}

/**

* Returns true if seq1 has a cross-reference to seq2. Currently this assumes

1589

* that sequence name is structured as Source|AccessionId.

* @param seq1

* @param seq2

* @return

108

public static boolean hasCrossRef(SequenceI seq1, SequenceI seq2)

1596

{

1597

108

if (seq1 == null || seq2 == null)

{

return false;

}

100

String name = seq2.getName();

1602

100

final DBRefEntry[] xrefs = seq1.getDBRefs();

1603

100

if (xrefs != null)

1604

{

1605

for (DBRefEntry xref : xrefs)

1606

{

1607

String xrefName = xref.getSource() + "|" + xref.getAccessionId();

1608

// case-insensitive test, consistent with DBRefEntry.equalRef()

1609

if (xrefName.equalsIgnoreCase(name))

{

return true;

}

}

}

return false;

}

/**

* Constructs an alignment consisting of the mapped (CDS) regions in the given

1620

* nucleotide sequences, and updates mappings to match. The CDS sequences are

1621

* added to the original alignment's dataset, which is shared by the new

1622

* alignment. Mappings from nucleotide to CDS, and from CDS to protein, are

1623

* added to the alignment dataset.

1624

1625

* @param dna

1626

* aligned nucleotide (dna or cds) sequences

1627

* @param dataset

1628

* the alignment dataset the sequences belong to

1629

* @param products

1630

* (optional) to restrict results to CDS that map to specified

1631

* protein products

1632

* @return an alignment whose sequences are the cds-only parts of the dna

1633

* sequences (or null if no mappings are found)

1634

1635

public static AlignmentI makeCdsAlignment(SequenceI[] dna,

1636

AlignmentI dataset, SequenceI[] products)

1637

{

1638

if (dataset == null || dataset.getDataset() != null)

1639

{

1640

throw new IllegalArgumentException(

1641

"IMPLEMENTATION ERROR: dataset.getDataset() must be null!");

1642

}

1643

List<SequenceI> foundSeqs = new ArrayList<>();

1644

List<SequenceI> cdsSeqs = new ArrayList<>();

1645

List<AlignedCodonFrame> mappings = dataset.getCodonFrames();

1646

HashSet<SequenceI> productSeqs = null;

1647

if (products != null)

1648

{

1649

productSeqs = new HashSet<>();

1650

for (SequenceI seq : products)

1651

{

1652

productSeqs.add(seq.getDatasetSequence() == null ? seq : seq

1653

.getDatasetSequence());

}

}

* Construct CDS sequences from mappings on the alignment dataset.

1659

* The logic is:

1660

* - find the protein product(s) mapped to from each dna sequence

1661

* - if the mapping covers the whole dna sequence (give or take start/stop

1662

* codon), take the dna as the CDS sequence

1663

* - else search dataset mappings for a suitable dna sequence, i.e. one

1664

* whose whole sequence is mapped to the protein

1665

* - if no sequence found, construct one from the dna sequence and mapping

1666

* (and add it to dataset so it is found if this is repeated)

1667

1668

for (SequenceI dnaSeq : dna)

1669

{

1670

SequenceI dnaDss = dnaSeq.getDatasetSequence() == null ? dnaSeq

1671

: dnaSeq.getDatasetSequence();

1672

1673

List<AlignedCodonFrame> seqMappings = MappingUtils

1674

.findMappingsForSequence(dnaSeq, mappings);

1675

for (AlignedCodonFrame mapping : seqMappings)

1676

{

1677

List<Mapping> mappingsFromSequence = mapping

1678

.getMappingsFromSequence(dnaSeq);

1679

1680

for (Mapping aMapping : mappingsFromSequence)

1681

{

1682

MapList mapList = aMapping.getMap();

1683

if (mapList.getFromRatio() == 1)

1684

{

1685

1686

* not a dna-to-protein mapping (likely dna-to-cds)

continue;

}

* skip if mapping is not to one of the target set of proteins

1693

1694

SequenceI proteinProduct = aMapping.getTo();

1695

if (productSeqs != null && !productSeqs.contains(proteinProduct))

{

continue;

}

* try to locate the CDS from the dataset mappings;

1702

* guard against duplicate results (for the case that protein has

1703

* dbrefs to both dna and cds sequences)

1704

1705

SequenceI cdsSeq = findCdsForProtein(mappings, dnaSeq,

1706

seqMappings, aMapping);

1707

if (cdsSeq != null)

1708

{

1709

if (!foundSeqs.contains(cdsSeq))

1710

{

1711

foundSeqs.add(cdsSeq);

1712

SequenceI derivedSequence = cdsSeq.deriveSequence();

1713

cdsSeqs.add(derivedSequence);

1714

if (!dataset.getSequences().contains(cdsSeq))

1715

{

1716

dataset.addSequence(cdsSeq);

}

}

continue;

}

* didn't find mapped CDS sequence - construct it and add

1724

* its dataset sequence to the dataset

1725

1726

cdsSeq = makeCdsSequence(dnaSeq.getDatasetSequence(), aMapping,

1727

dataset).deriveSequence();

1728

// cdsSeq has a name constructed as CDS|<dbref>

1729

// <dbref> will be either the accession for the coding sequence,

1730

// marked in the /via/ dbref to the protein product accession

1731

// or it will be the original nucleotide accession.

1732

SequenceI cdsSeqDss = cdsSeq.getDatasetSequence();

cdsSeqs.add(cdsSeq);

if (!dataset.getSequences().contains(cdsSeqDss))

1737

{

1738

// check if this sequence is a newly created one

1739

// so needs adding to the dataset

1740

dataset.addSequence(cdsSeqDss);

}

* add a mapping from CDS to the (unchanged) mapped to range

1745

1746

List<int[]> cdsRange = Collections.singletonList(new int[] { 1,

1747

cdsSeq.getLength() });

1748

MapList cdsToProteinMap = new MapList(cdsRange,

1749

mapList.getToRanges(), mapList.getFromRatio(),

1750

mapList.getToRatio());

1751

AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();

1752

cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct,

cdsToProteinMap);

* guard against duplicating the mapping if repeating this action

1757

1758

if (!mappings.contains(cdsToProteinMapping))

1759

{

1760

mappings.add(cdsToProteinMapping);

1761

}

1762

1763

propagateDBRefsToCDS(cdsSeqDss, dnaSeq.getDatasetSequence(),

1764

proteinProduct, aMapping);

1765

1766

* add another mapping from original 'from' range to CDS

1767

1768

AlignedCodonFrame dnaToCdsMapping = new AlignedCodonFrame();

1769

final MapList dnaToCdsMap = new MapList(mapList.getFromRanges(),

1770

cdsRange, 1, 1);

1771

dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeqDss,

1772

dnaToCdsMap);

1773

if (!mappings.contains(dnaToCdsMapping))

1774

{

1775

mappings.add(dnaToCdsMapping);

}

* transfer dna chromosomal loci (if known) to the CDS

1780

* sequence (via the mapping)

1781

1782

final MapList cdsToDnaMap = dnaToCdsMap.getInverse();

1783

transferGeneLoci(dnaSeq, cdsToDnaMap, cdsSeq);

1784

1785

1786

* add DBRef with mapping from protein to CDS

1787

* (this enables Get Cross-References from protein alignment)

1788

* This is tricky because we can't have two DBRefs with the

1789

* same source and accession, so need a different accession for

1790

* the CDS from the dna sequence

1791

1792

1793

// specific use case:

1794

// Genomic contig ENSCHR:1, contains coding regions for ENSG01,

1795

// ENSG02, ENSG03, with transcripts and products similarly named.

1796

// cannot add distinct dbrefs mapping location on ENSCHR:1 to ENSG01

1797

1798

// JBPNote: ?? can't actually create an example that demonstrates we

1799

// need to

1800

// synthesize an xref.

1801

1802

for (DBRefEntry primRef : dnaDss.getPrimaryDBRefs())

1803

{

1804

1805

* create a cross-reference from CDS to the source sequence's

1806

* primary reference and vice versa

1807

1808

String source = primRef.getSource();

1809

String version = primRef.getVersion();

1810

DBRefEntry cdsCrossRef = new DBRefEntry(source, source + ":"

1811

+ version, primRef.getAccessionId());

1812

cdsCrossRef.setMap(new Mapping(dnaDss, new MapList(cdsToDnaMap)));

1813

cdsSeqDss.addDBRef(cdsCrossRef);

1814

1815

dnaSeq.addDBRef(new DBRefEntry(source, version, cdsSeq

1816

.getName(), new Mapping(cdsSeqDss, dnaToCdsMap)));

1817

1818

// problem here is that the cross-reference is synthesized -

1819

// cdsSeq.getName() may be like 'CDS|dnaaccession' or

1820

// 'CDS|emblcdsacc'

1821

// assuming cds version same as dna ?!?

1822

1823

DBRefEntry proteinToCdsRef = new DBRefEntry(source, version,

1824

cdsSeq.getName());

1825

1826

proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap

1827

.getInverse()));

1828

proteinProduct.addDBRef(proteinToCdsRef);

}

* transfer any features on dna that overlap the CDS

1833

1834

transferFeatures(dnaSeq, cdsSeq, dnaToCdsMap, null,

1835

SequenceOntologyI.CDS);

}

}

}

AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs

1841

.size()]));

1842

cds.setDataset(dataset);

return cds;

}

/**

* Tries to transfer gene loci (dbref to chromosome positions) from fromSeq to

1849

* toSeq, mediated by the given mapping between the sequences

1850

1851

* @param fromSeq

1852

* @param targetToFrom

* Map

* @param targetSeq

protected static void transferGeneLoci(SequenceI fromSeq,

1857

MapList targetToFrom, SequenceI targetSeq)

1858

{

1859

if (targetSeq.getGeneLoci() != null)

1860

{

1861

// already have - don't override

1862

return;

1863

}

1864

GeneLociI fromLoci = fromSeq.getGeneLoci();

1865

if (fromLoci == null)

{

return;

}

MapList newMap = targetToFrom.traverse(fromLoci.getMap());

if (newMap != null)

{

targetSeq.setGeneLoci(fromLoci.getSpeciesId(),

1875

fromLoci.getAssemblyId(), fromLoci.getChromosomeId(), newMap);

}

}

/**

* A helper method that finds a CDS sequence in the alignment dataset that is

1881

* mapped to the given protein sequence, and either is, or has a mapping from,

1882

* the given dna sequence.

1883

1884

* @param mappings

1885

* set of all mappings on the dataset

1886

* @param dnaSeq

1887

* a dna (or cds) sequence we are searching from

1888

* @param seqMappings

1889

* the set of mappings involving dnaSeq

1890

* @param aMapping

1891

* a transcript-to-peptide mapping

1892

* @return

1893

1894

static SequenceI findCdsForProtein(List<AlignedCodonFrame> mappings,

1895

SequenceI dnaSeq, List<AlignedCodonFrame> seqMappings,

Mapping aMapping)

{

* TODO a better dna-cds-protein mapping data representation to allow easy

1900

* navigation; until then this clunky looping around lists of mappings

1901

1902

SequenceI seqDss = dnaSeq.getDatasetSequence() == null ? dnaSeq

1903

: dnaSeq.getDatasetSequence();

1904

SequenceI proteinProduct = aMapping.getTo();

1905

1906

1907

* is this mapping from the whole dna sequence (i.e. CDS)?

1908

* allowing for possible stop codon on dna but not peptide

1909

1910

int mappedFromLength = MappingUtils

1911

.getLength(aMapping.getMap().getFromRanges());

1912

int dnaLength = seqDss.getLength();

1913

if (mappedFromLength == dnaLength

1914

|| mappedFromLength == dnaLength - CODON_LENGTH)

1915

{

1916

1917

* if sequence has CDS features, this is a transcript with no UTR

1918

* - do not take this as the CDS sequence! (JAL-2789)

1919

1920

if (seqDss.getFeatures().getFeaturesByOntology(SequenceOntologyI.CDS)

.isEmpty())

{

return seqDss;

}

}

* looks like we found the dna-to-protein mapping; search for the

1929

* corresponding cds-to-protein mapping

1930

1931

List<AlignedCodonFrame> mappingsToPeptide = MappingUtils

1932

.findMappingsForSequence(proteinProduct, mappings);

1933

for (AlignedCodonFrame acf : mappingsToPeptide)

1934

{

1935

for (SequenceToSequenceMapping map : acf.getMappings())

1936

{

1937

Mapping mapping = map.getMapping();

1938

if (mapping != aMapping

1939

&& mapping.getMap().getFromRatio() == CODON_LENGTH

1940

&& proteinProduct == mapping.getTo()

1941

&& seqDss != map.getFromSeq())

1942

{

1943

mappedFromLength = MappingUtils

1944

.getLength(mapping.getMap().getFromRanges());

1945

if (mappedFromLength == map.getFromSeq().getLength())

1946

{

1947

1948

* found a 3:1 mapping to the protein product which covers

1949

* the whole dna sequence i.e. is from CDS; finally check the CDS

1950

* is mapped from the given dna start sequence

1951

1952

SequenceI cdsSeq = map.getFromSeq();

1953

// todo this test is weak if seqMappings contains multiple mappings;

1954

// we get away with it if transcript:cds relationship is 1:1

1955

List<AlignedCodonFrame> dnaToCdsMaps = MappingUtils

1956

.findMappingsForSequence(cdsSeq, seqMappings);

1957

if (!dnaToCdsMaps.isEmpty())

{

return cdsSeq;

}

}

}

}

}

return null;

}

/**

* Helper method that makes a CDS sequence as defined by the mappings from the

1970

* given sequence i.e. extracts the 'mapped from' ranges (which may be on

1971

* forward or reverse strand).

* @param seq

* @param mapping

* @param dataset

* - existing dataset. We check for sequences that look like the CDS

1977

* we are about to construct, if one exists already, then we will

1978

* just return that one.

1979

* @return CDS sequence (as a dataset sequence)

1980

1981

static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping,

1982

AlignmentI dataset)

1983

{

1984

char[] seqChars = seq.getSequence();

1985

List<int[]> fromRanges = mapping.getMap().getFromRanges();

1986

int cdsWidth = MappingUtils.getLength(fromRanges);

1987

char[] newSeqChars = new char[cdsWidth];

1988

1989

int newPos = 0;

1990

for (int[] range : fromRanges)

1991

{

1992

if (range[0] <= range[1])

1993

{

1994

// forward strand mapping - just copy the range

1995

int length = range[1] - range[0] + 1;

1996

System.arraycopy(seqChars, range[0] - 1, newSeqChars, newPos,

length);

newPos += length;

}

else

{

// reverse strand mapping - copy and complement one by one

2003

for (int i = range[0]; i >= range[1]; i--)

2004

{

2005

newSeqChars[newPos++] = Dna.getComplement(seqChars[i - 1]);

}

}

}

* assign 'from id' held in the mapping if set (e.g. EMBL protein_id),

2012

* else generate a sequence name

2013

2014

String mapFromId = mapping.getMappedFromId();

2015

String seqId = "CDS|" + (mapFromId != null ? mapFromId : seq.getName());

2016

SequenceI newSeq = new Sequence(seqId, newSeqChars, 1, newPos);

2017

if (dataset != null)

2018

{

2019

SequenceI[] matches = dataset.findSequenceMatch(newSeq.getName());

2020

if (matches != null)

2021

{

2022

boolean matched = false;

2023

for (SequenceI mtch : matches)

2024

{

2025

if (mtch.getStart() != newSeq.getStart())

{

continue;

}

if (mtch.getEnd() != newSeq.getEnd())

{

continue;

}

if (!Arrays.equals(mtch.getSequence(), newSeq.getSequence()))

{

continue;

}

if (!matched)

{

matched = true;

newSeq = mtch;

}

else

{

System.err.println(

"JAL-2154 regression: warning - found (and ignnored a duplicate CDS sequence):"

+ mtch.toString());

}

}

}

}

// newSeq.setDescription(mapFromId);

return newSeq;

}

/**

* Adds any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to

* the given mapping.

* @param cdsSeq

* @param contig

* @param proteinProduct

2063

* @param mapping

2064

* @return list of DBRefEntrys added

2065

2066

protected static List<DBRefEntry> propagateDBRefsToCDS(SequenceI cdsSeq,

2067

SequenceI contig, SequenceI proteinProduct, Mapping mapping)

2068

{

2069

2070

// gather direct refs from contig congruent with mapping

2071

List<DBRefEntry> direct = new ArrayList<>();

2072

HashSet<String> directSources = new HashSet<>();

2073

2074

if (contig.getDBRefs() != null)

2075

{

2076

for (DBRefEntry dbr : contig.getDBRefs())

2077

{

2078

if (dbr.hasMap() && dbr.getMap().getMap().isTripletMap())

2079

{

2080

MapList map = dbr.getMap().getMap();

2081

// check if map is the CDS mapping

2082

if (mapping.getMap().equals(map))

2083

{

2084

direct.add(dbr);

2085

directSources.add(dbr.getSource());

}

}

}

}

DBRefEntry[] onSource = DBRefUtils.selectRefs(

2091

proteinProduct.getDBRefs(),

2092

directSources.toArray(new String[0]));

2093

List<DBRefEntry> propagated = new ArrayList<>();

2094

2095

// and generate appropriate mappings

2096

for (DBRefEntry cdsref : direct)

2097

{

2098

// clone maplist and mapping

2099

MapList cdsposmap = new MapList(

2100

Arrays.asList(new int[][]

2101

{ new int[] { cdsSeq.getStart(), cdsSeq.getEnd() } }),

2102

cdsref.getMap().getMap().getToRanges(), 3, 1);

2103

Mapping cdsmap = new Mapping(cdsref.getMap().getTo(),

2104

cdsref.getMap().getMap());

2105

2106

// create dbref

2107

DBRefEntry newref = new DBRefEntry(cdsref.getSource(),

2108

cdsref.getVersion(), cdsref.getAccessionId(),

2109

new Mapping(cdsmap.getTo(), cdsposmap));

2110

2111

// and see if we can map to the protein product for this mapping.

2112

// onSource is the filtered set of accessions on protein that we are

2113

// tranferring, so we assume accession is the same.

2114

if (cdsmap.getTo() == null && onSource != null)

2115

{

2116

List<DBRefEntry> sourceRefs = DBRefUtils.searchRefs(onSource,

2117

cdsref.getAccessionId());

2118

if (sourceRefs != null)

2119

{

2120

for (DBRefEntry srcref : sourceRefs)

2121

{

2122

if (srcref.getSource().equalsIgnoreCase(cdsref.getSource()))

2123

{

2124

// we have found a complementary dbref on the protein product, so

2125

// update mapping's getTo

2126

newref.getMap().setTo(proteinProduct);

}

}

}

}

cdsSeq.addDBRef(newref);

2132

propagated.add(newref);

}

return propagated;

}

/**

* Transfers co-located features on 'fromSeq' to 'toSeq', adjusting the

2139

* feature start/end ranges, optionally omitting specified feature types.

2140

* Returns the number of features copied.

* @param fromSeq

* @param toSeq

* @param mapping

* the mapping from 'fromSeq' to 'toSeq'

2146

* @param select

2147

* if not null, only features of this type are copied (including

2148

* subtypes in the Sequence Ontology)

2149

* @param omitting

2150

2151

protected static int transferFeatures(SequenceI fromSeq, SequenceI toSeq,

2152

MapList mapping, String select, String... omitting)

2153

{

2154

SequenceI copyTo = toSeq;

2155

while (copyTo.getDatasetSequence() != null)

2156

{

2157

copyTo = copyTo.getDatasetSequence();

}

* get features, optionally restricted by an ontology term

2162

2163

List<SequenceFeature> sfs = select == null ? fromSeq.getFeatures()

2164

.getPositionalFeatures() : fromSeq.getFeatures()

2165

.getFeaturesByOntology(select);

2166

2167

int count = 0;

2168

for (SequenceFeature sf : sfs)

2169

{

2170

String type = sf.getType();

2171

boolean omit = false;

2172

for (String toOmit : omitting)

2173

{

2174

if (type.equals(toOmit))

{

omit = true;

}

}

if (omit)

{

continue;

}

* locate the mapped range - null if either start or end is

2186

* not mapped (no partial overlaps are calculated)

2187

2188

int start = sf.getBegin();

2189

int end = sf.getEnd();

2190

int[] mappedTo = mapping.locateInTo(start, end);

2191

2192

* if whole exon range doesn't map, try interpreting it

2193

* as 5' or 3' exon overlapping the CDS range

2194

2195

if (mappedTo == null)

2196

{

2197

mappedTo = mapping.locateInTo(end, end);

2198

if (mappedTo != null)

2199

{

2200

2201

* end of exon is in CDS range - 5' overlap

2202

* to a range from the start of the peptide

mappedTo[0] = 1;

}

}

if (mappedTo == null)

2208

{

2209

mappedTo = mapping.locateInTo(start, start);

2210

if (mappedTo != null)

2211

{

2212

2213

* start of exon is in CDS range - 3' overlap

2214

* to a range up to the end of the peptide

2215

2216

mappedTo[1] = toSeq.getLength();

2217

}

2218

}

2219

if (mappedTo != null)

2220

{

2221

int newBegin = Math.min(mappedTo[0], mappedTo[1]);

2222

int newEnd = Math.max(mappedTo[0], mappedTo[1]);

2223

SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd,

2224

sf.getFeatureGroup(), sf.getScore());

2225

copyTo.addSequenceFeature(copy);

count++;

}

}

return count;

}

/**

* Returns a mapping from dna to protein by inspecting sequence features of

2234

* type "CDS" on the dna. A mapping is constructed if the total CDS feature

2235

* length is 3 times the peptide length (optionally after dropping a trailing

2236

* stop codon). This method does not check whether the CDS nucleotide sequence

2237

* translates to the peptide sequence.

* @param dnaSeq

* @param proteinSeq

* @return

public static MapList mapCdsToProtein(SequenceI dnaSeq,

2244

SequenceI proteinSeq)

2245

{

2246

List<int[]> ranges = findCdsPositions(dnaSeq);

2247

int mappedDnaLength = MappingUtils.getLength(ranges);

2248

2249

2250

* if not a whole number of codons, truncate mapping

2251

2252

int codonRemainder = mappedDnaLength % CODON_LENGTH;

2253

if (codonRemainder > 0)

2254

{

2255

mappedDnaLength -= codonRemainder;

2256

MappingUtils.removeEndPositions(codonRemainder, ranges);

2257

}

2258

2259

int proteinLength = proteinSeq.getLength();

2260

int proteinStart = proteinSeq.getStart();

2261

int proteinEnd = proteinSeq.getEnd();

2262

2263

2264

* incomplete start codon may mean X at start of peptide

2265

* we ignore both for mapping purposes

2266

2267

if (proteinSeq.getCharAt(0) == 'X')

2268

{

2269

// todo JAL-2022 support startPhase > 0

proteinStart++;

proteinLength--;

}

List<int[]> proteinRange = new ArrayList<>();

2274

2275

2276

* dna length should map to protein (or protein plus stop codon)

2277

2278

int codesForResidues = mappedDnaLength / CODON_LENGTH;

2279

if (codesForResidues == (proteinLength + 1))

2280

{

2281

// assuming extra codon is for STOP and not in peptide

2282

// todo: check trailing codon is indeed a STOP codon

2283

codesForResidues--;

2284

mappedDnaLength -= CODON_LENGTH;

2285

MappingUtils.removeEndPositions(CODON_LENGTH, ranges);

2286

}

2287

2288

if (codesForResidues == proteinLength)

2289

{

2290

proteinRange.add(new int[] { proteinStart, proteinEnd });

2291

return new MapList(ranges, proteinRange, CODON_LENGTH, 1);

}

return null;

}

/**

* Returns a list of CDS ranges found (as sequence positions base 1), i.e. of

2298

* [start, end] positions of sequence features of type "CDS" (or a sub-type of

2299

* CDS in the Sequence Ontology). The ranges are sorted into ascending start

2300

* position order, so this method is only valid for linear CDS in the same

2301

* sense as the protein product.

* @param dnaSeq

* @return

protected static List<int[]> findCdsPositions(SequenceI dnaSeq)

2307

{

2308

List<int[]> result = new ArrayList<>();

2309

2310

List<SequenceFeature> sfs = dnaSeq.getFeatures().getFeaturesByOntology(

2311

SequenceOntologyI.CDS);

if (sfs.isEmpty())

{

return result;

}

SequenceFeatures.sortFeatures(sfs, true);

2317

2318

for (SequenceFeature sf : sfs)

{

int phase = 0;

try

{

phase = Integer.parseInt(sf.getPhase());

2324

} catch (NumberFormatException e)

{

// ignore

}

* phase > 0 on first codon means 5' incomplete - skip to the start

2330

* of the next codon; example ENST00000496384

2331

2332

int begin = sf.getBegin();

2333

int end = sf.getEnd();

2334

if (result.isEmpty() && phase > 0)

{

begin += phase;

if (begin > end)

{

// shouldn't happen!

System.err

.println("Error: start phase extends beyond start CDS in "

+ dnaSeq.getName());

}

}

result.add(new int[] { begin, end });

}

* Finally sort ranges by start position. This avoids a dependency on

2350

* keeping features in order on the sequence (if they are in order anyway,

2351

* the sort will have almost no work to do). The implicit assumption is CDS

2352

* ranges are assembled in order. Other cases should not use this method,

2353

* but instead construct an explicit mapping for CDS (e.g. EMBL parsing).

2354

2355

Collections.sort(result, IntRangeComparator.ASCENDING);

return result;

}

/**

* Maps exon features from dna to protein, and computes variants in peptide

2361

* product generated by variants in dna, and adds them as sequence_variant

2362

* features on the protein sequence. Returns the number of variant features

* added.

* @param dnaSeq

* @param peptide

* @param dnaToProtein

2368

2369

public static int computeProteinFeatures(SequenceI dnaSeq,

2370

SequenceI peptide, MapList dnaToProtein)

2371

{

2372

while (dnaSeq.getDatasetSequence() != null)

2373

{

2374

dnaSeq = dnaSeq.getDatasetSequence();

2375

}

2376

while (peptide.getDatasetSequence() != null)

2377

{

2378

peptide = peptide.getDatasetSequence();

2379

}

2380

2381

transferFeatures(dnaSeq, peptide, dnaToProtein, SequenceOntologyI.EXON);

2382

2383

2384

* compute protein variants from dna variants and codon mappings;

2385

* NB - alternatively we could retrieve this using the REST service e.g.

2386

* http://rest.ensembl.org/overlap/translation

2387

* /ENSP00000288602?feature=transcript_variation;content-type=text/xml

2388

* which would be a bit slower but possibly more reliable

* build a map with codon variations for each potentially varying peptide

2393

2394

LinkedHashMap<Integer, List<DnaVariant>[]> variants = buildDnaVariantsMap(

2395

dnaSeq, dnaToProtein);

2396

2397

2398

* scan codon variations, compute peptide variants and add to peptide sequence

2399

2400

int count = 0;

2401

for (Entry<Integer, List<DnaVariant>[]> variant : variants.entrySet())

2402

{

2403

int peptidePos = variant.getKey();

2404

List<DnaVariant>[] codonVariants = variant.getValue();

2405

count += computePeptideVariants(peptide, peptidePos, codonVariants);

}

return count;

}

/**

* Computes non-synonymous peptide variants from codon variants and adds them

2413

* as sequence_variant features on the protein sequence (one feature per

2414

* allele variant). Selected attributes (variant id, clinical significance)

2415

* are copied over to the new features.

2416

2417

* @param peptide

2418

* the protein sequence

2419

* @param peptidePos

2420

* the position to compute peptide variants for

2421

* @param codonVariants

2422

* a list of dna variants per codon position

2423

* @return the number of features added

2424

2425

static int computePeptideVariants(SequenceI peptide, int peptidePos,

2426

List<DnaVariant>[] codonVariants)

2427

{

2428

String residue = String.valueOf(peptide.getCharAt(peptidePos - 1));

2429

int count = 0;

2430

String base1 = codonVariants[0].get(0).base;

2431

String base2 = codonVariants[1].get(0).base;

2432

String base3 = codonVariants[2].get(0).base;

2433

2434

2435

* variants in first codon base

2436

2437

for (DnaVariant dnavar : codonVariants[0])

2438

{

2439

if (dnavar.variant != null)

2440

{

2441

String alleles = (String) dnavar.variant.getValue(Gff3Helper.ALLELES);

2442

if (alleles != null)

2443

{

2444

for (String base : alleles.split(","))

2445

{

2446

if (!base1.equalsIgnoreCase(base))

2447

{

2448

String codon = base.toUpperCase() + base2.toLowerCase()

2449

+ base3.toLowerCase();

2450

String canonical = base1.toUpperCase() + base2.toLowerCase()

2451

+ base3.toLowerCase();

2452

if (addPeptideVariant(peptide, peptidePos, residue, dnavar,

codon, canonical))

{

count++;

}

}

}

}

}

}

* variants in second codon base

2465

2466

for (DnaVariant var : codonVariants[1])

2467

{

2468

if (var.variant != null)

2469

{

2470

String alleles = (String) var.variant.getValue(Gff3Helper.ALLELES);

2471

if (alleles != null)

2472

{

2473

for (String base : alleles.split(","))

2474

{

2475

if (!base2.equalsIgnoreCase(base))

2476

{

2477

String codon = base1.toLowerCase() + base.toUpperCase()

2478

+ base3.toLowerCase();

2479

String canonical = base1.toLowerCase() + base2.toUpperCase()

2480

+ base3.toLowerCase();

2481

if (addPeptideVariant(peptide, peptidePos, residue, var,

codon, canonical))

{

count++;

}

}

}

}

}

}

* variants in third codon base

2494

2495

for (DnaVariant var : codonVariants[2])

2496

{

2497

if (var.variant != null)

2498

{

2499

String alleles = (String) var.variant.getValue(Gff3Helper.ALLELES);

2500

if (alleles != null)

2501

{

2502

for (String base : alleles.split(","))

2503

{

2504

if (!base3.equalsIgnoreCase(base))

2505

{

2506

String codon = base1.toLowerCase() + base2.toLowerCase()

2507

+ base.toUpperCase();

2508

String canonical = base1.toLowerCase() + base2.toLowerCase()

2509

+ base3.toUpperCase();

2510

if (addPeptideVariant(peptide, peptidePos, residue, var,

codon, canonical))

{

count++;

}

}

}

}

}

}

return count;

}

/**

* Helper method that adds a peptide variant feature. ID and

2526

* clinical_significance attributes of the dna variant (if present) are copied

2527

* to the new feature.

* @param peptide

* @param peptidePos

* @param residue

* @param var

* @param codon

* the variant codon e.g. aCg

2535

* @param canonical

2536

* the 'normal' codon e.g. aTg

2537

* @return true if a feature was added, else false

2538

2539

static boolean addPeptideVariant(SequenceI peptide, int peptidePos,

2540

String residue, DnaVariant var, String codon, String canonical)

2541

{

2542

2543

* get peptide translation of codon e.g. GAT -> D

2544

* note that variants which are not single alleles,

2545

* e.g. multibase variants or HGMD_MUTATION etc

2546

* are currently ignored here

2547

2548

String trans = codon.contains("-") ? null

2549

: (codon.length() > CODON_LENGTH ? null

2550

: ResidueProperties.codonTranslate(codon));

if (trans == null)

{

return false;

}

String desc = canonical + "/" + codon;

2556

String featureType = "";

2557

if (trans.equals(residue))

2558

{

2559

featureType = SequenceOntologyI.SYNONYMOUS_VARIANT;

2560

}

2561

else if (ResidueProperties.STOP.equals(trans))

2562

{

2563

featureType = SequenceOntologyI.STOP_GAINED;

}

else

{

String residue3Char = StringUtils

2568

.toSentenceCase(ResidueProperties.aa2Triplet.get(residue));

2569

String trans3Char = StringUtils

2570

.toSentenceCase(ResidueProperties.aa2Triplet.get(trans));

2571

desc = "p." + residue3Char + peptidePos + trans3Char;

2572

featureType = SequenceOntologyI.NONSYNONYMOUS_VARIANT;

2573

}

2574

SequenceFeature sf = new SequenceFeature(featureType, desc, peptidePos,

2575

peptidePos, var.getSource());

2576

2577

StringBuilder attributes = new StringBuilder(32);

2578

String id = (String) var.variant.getValue(ID);

2579

if (id != null)

2580

{

2581

if (id.startsWith(SEQUENCE_VARIANT))

2582

{

2583

id = id.substring(SEQUENCE_VARIANT.length());

2584

}

2585

sf.setValue(ID, id);

2586

attributes.append(ID).append("=").append(id);

2587

// TODO handle other species variants JAL-2064

2588

StringBuilder link = new StringBuilder(32);

2589

try

2590

{

2591

link.append(desc).append(" ").append(id).append(

2592

"|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=")

2593

.append(URLEncoder.encode(id, "UTF-8"));

2594

sf.addLink(link.toString());

2595

} catch (UnsupportedEncodingException e)

{

// as if

}

}

String clinSig = (String) var.variant.getValue(CLINICAL_SIGNIFICANCE);

2601

if (clinSig != null)

2602

{

2603

sf.setValue(CLINICAL_SIGNIFICANCE, clinSig);

2604

attributes.append(";").append(CLINICAL_SIGNIFICANCE).append("=")

2605

.append(clinSig);

2606

}

2607

peptide.addSequenceFeature(sf);

2608

if (attributes.length() > 0)

2609

{

2610

sf.setAttributes(attributes.toString());

}

return true;

}

/**

* Builds a map whose key is position in the protein sequence, and value is a

2617

* list of the base and all variants for each corresponding codon position.

2618

* <p>

2619

* This depends on dna variants being held as a comma-separated list as

2620

* property "alleles" on variant features.

2621

2622

* @param dnaSeq

2623

* @param dnaToProtein

2624

* @return

2625

2626

@SuppressWarnings("unchecked")

2627

static LinkedHashMap<Integer, List<DnaVariant>[]> buildDnaVariantsMap(

2628

SequenceI dnaSeq, MapList dnaToProtein)

2629

{

2630

2631

* map from peptide position to all variants of the codon which codes for it

2632

* LinkedHashMap ensures we keep the peptide features in sequence order

2633

2634

LinkedHashMap<Integer, List<DnaVariant>[]> variants = new LinkedHashMap<>();

2635

2636

List<SequenceFeature> dnaFeatures = dnaSeq.getFeatures()

2637

.getFeaturesByOntology(SequenceOntologyI.SEQUENCE_VARIANT);

2638

if (dnaFeatures.isEmpty())

{

return variants;

}

int dnaStart = dnaSeq.getStart();

2644

int[] lastCodon = null;

2645

int lastPeptidePostion = 0;

2646

2647

2648

* build a map of codon variations for peptides

2649

2650

for (SequenceFeature sf : dnaFeatures)

2651

{

2652

int dnaCol = sf.getBegin();

2653

if (dnaCol != sf.getEnd())

2654

{

2655

// not handling multi-locus variant features

continue;

}

* ignore variant if not a SNP

2661

2662

String alls = (String) sf.getValue(Gff3Helper.ALLELES);

2663

if (alls == null)

2664

{

2665

continue; // non-SNP VCF variant perhaps - can't process this

2666

}

2667

2668

String[] alleles = alls.toUpperCase().split(",");

2669

boolean isSnp = true;

2670

for (String allele : alleles)

2671

{

2672

if (allele.trim().length() > 1)

{

isSnp = false;

}

}

if (!isSnp)

{

continue;

}

int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol);

2683

if (mapsTo == null)

2684

{

2685

// feature doesn't lie within coding region

2686

continue;

2687

}

2688

int peptidePosition = mapsTo[0];

2689

List<DnaVariant>[] codonVariants = variants.get(peptidePosition);

2690

if (codonVariants == null)

2691

{

2692

codonVariants = new ArrayList[CODON_LENGTH];

2693

codonVariants[0] = new ArrayList<>();

2694

codonVariants[1] = new ArrayList<>();

2695

codonVariants[2] = new ArrayList<>();

2696

variants.put(peptidePosition, codonVariants);

}

* get this peptide's codon positions e.g. [3, 4, 5] or [4, 7, 10]

2701

2702

int[] codon = peptidePosition == lastPeptidePostion ? lastCodon

2703

: MappingUtils.flattenRanges(dnaToProtein.locateInFrom(

2704

peptidePosition, peptidePosition));

2705

lastPeptidePostion = peptidePosition;

lastCodon = codon;

* save nucleotide (and any variant) for each codon position

2710

2711

for (int codonPos = 0; codonPos < CODON_LENGTH; codonPos++)

2712

{

2713

String nucleotide = String.valueOf(

2714

dnaSeq.getCharAt(codon[codonPos] - dnaStart)).toUpperCase();

2715

List<DnaVariant> codonVariant = codonVariants[codonPos];

2716

if (codon[codonPos] == dnaCol)

2717

{

2718

if (!codonVariant.isEmpty()

2719

&& codonVariant.get(0).variant == null)

2720

{

2721

2722

* already recorded base value, add this variant

2723

2724

codonVariant.get(0).variant = sf;

}

else

{

* add variant with base value

2730

2731

codonVariant.add(new DnaVariant(nucleotide, sf));

2732

}

2733

}

2734

else if (codonVariant.isEmpty())

2735

{

2736

2737

* record (possibly non-varying) base value

2738

2739

codonVariant.add(new DnaVariant(nucleotide));

}

}

}

return variants;

}

/**

* Makes an alignment with a copy of the given sequences, adding in any

2748

* non-redundant sequences which are mapped to by the cross-referenced

* sequences.

* @param seqs

* @param xrefs

* @param dataset

* the alignment dataset shared by the new copy

2755

* @return

2756

2757

public static AlignmentI makeCopyAlignment(SequenceI[] seqs,

2758

SequenceI[] xrefs, AlignmentI dataset)

2759

{

2760

AlignmentI copy = new Alignment(new Alignment(seqs));

2761

copy.setDataset(dataset);

2762

boolean isProtein = !copy.isNucleotide();

2763

SequenceIdMatcher matcher = new SequenceIdMatcher(seqs);

2764

if (xrefs != null)

2765

{

2766

for (SequenceI xref : xrefs)

2767

{

2768

DBRefEntry[] dbrefs = xref.getDBRefs();

2769

if (dbrefs != null)

2770

{

2771

for (DBRefEntry dbref : dbrefs)

2772

{

2773

if (dbref.getMap() == null || dbref.getMap().getTo() == null

2774

|| dbref.getMap().getTo().isProtein() != isProtein)

{

continue;

}

SequenceI mappedTo = dbref.getMap().getTo();

2779

SequenceI match = matcher.findIdMatch(mappedTo);

2780

if (match == null)

2781

{

2782

matcher.add(mappedTo);

2783

copy.addSequence(mappedTo);

}

}

}

}

}

return copy;

}

/**

* Try to align sequences in 'unaligned' to match the alignment of their

2794

* mapped regions in 'aligned'. For example, could use this to align CDS

2795

* sequences which are mapped to their parent cDNA sequences.

2796

2797

* This method handles 1:1 mappings (dna-to-dna or protein-to-protein). For

2798

* dna-to-protein or protein-to-dna use alternative methods.

2799

2800

* @param unaligned

2801

* sequences to be aligned

2802

* @param aligned

2803

* holds aligned sequences and their mappings

2804

* @return

2805

2806

public static int alignAs(AlignmentI unaligned, AlignmentI aligned)

2807

{

2808

2809

* easy case - aligning a copy of aligned sequences

2810

2811

if (alignAsSameSequences(unaligned, aligned))

2812

{

2813

return unaligned.getHeight();

}

* fancy case - aligning via mappings between sequences

2818

2819

List<SequenceI> unmapped = new ArrayList<>();

2820

Map<Integer, Map<SequenceI, Character>> columnMap = buildMappedColumnsMap(

2821

unaligned, aligned, unmapped);

2822

int width = columnMap.size();

2823

char gap = unaligned.getGapCharacter();

2824

int realignedCount = 0;

2825

// TODO: verify this loop scales sensibly for very wide/high alignments

2826

2827

for (SequenceI seq : unaligned.getSequences())

2828

{

2829

if (!unmapped.contains(seq))

2830

{

2831

char[] newSeq = new char[width];

2832

Arrays.fill(newSeq, gap); // JBPComment - doubt this is faster than the

2833

// Integer iteration below

int newCol = 0;

int lastCol = 0;

* traverse the map to find columns populated

2839

* by our sequence

2840

2841

for (Integer column : columnMap.keySet())

2842

{

2843

Character c = columnMap.get(column).get(seq);

if (c != null)

{

* sequence has a character at this position

newSeq[newCol] = c;

lastCol = newCol;

}

newCol++;

}

* trim trailing gaps

if (lastCol < width)

{

char[] tmp = new char[lastCol + 1];

2862

System.arraycopy(newSeq, 0, tmp, 0, lastCol + 1);

2863

newSeq = tmp;

2864

}

2865

// TODO: optimise SequenceI to avoid char[]->String->char[]

2866

seq.setSequence(String.valueOf(newSeq));

realignedCount++;

}

}

return realignedCount;

}

/**

* If unaligned and aligned sequences share the same dataset sequences, then

2875

* simply copies the aligned sequences to the unaligned sequences and returns

2876

* true; else returns false

2877

2878

* @param unaligned

2879

* - sequences to be aligned based on aligned

2880

* @param aligned

2881

* - 'guide' alignment containing sequences derived from same dataset

* as unaligned

* @return

static boolean alignAsSameSequences(AlignmentI unaligned,

2886

AlignmentI aligned)

2887

{

2888

if (aligned.getDataset() == null || unaligned.getDataset() == null)

2889

{

2890

return false; // should only pass alignments with datasets here

2891

}

2892

2893

// map from dataset sequence to alignment sequence(s)

2894

Map<SequenceI, List<SequenceI>> alignedDatasets = new HashMap<>();

2895

for (SequenceI seq : aligned.getSequences())

2896

{

2897

SequenceI ds = seq.getDatasetSequence();

2898

if (alignedDatasets.get(ds) == null)

2899

{

2900

alignedDatasets.put(ds, new ArrayList<SequenceI>());

2901

}

2902

alignedDatasets.get(ds).add(seq);

}

* first pass - check whether all sequences to be aligned share a dataset

2907

* sequence with an aligned sequence

2908

2909

for (SequenceI seq : unaligned.getSequences())

2910

{

2911

if (!alignedDatasets.containsKey(seq.getDatasetSequence()))

{

return false;

}

}

* second pass - copy aligned sequences;

2919

* heuristic rule: pair off sequences in order for the case where

2920

* more than one shares the same dataset sequence

2921

2922

for (SequenceI seq : unaligned.getSequences())

2923

{

2924

List<SequenceI> alignedSequences = alignedDatasets

2925

.get(seq.getDatasetSequence());

2926

// TODO: getSequenceAsString() will be deprecated in the future

2927

// TODO: need to leave to SequenceI implementor to update gaps

2928

seq.setSequence(alignedSequences.get(0).getSequenceAsString());

2929

if (alignedSequences.size() > 0)

2930

{

2931

// pop off aligned sequences (except the last one)

2932

alignedSequences.remove(0);

}

}

return true;

}

/**

* Returns a map whose key is alignment column number (base 1), and whose

2941

* values are a map of sequence characters in that column.

* @param unaligned

* @param aligned

* @param unmapped

* @return

static SortedMap<Integer, Map<SequenceI, Character>> buildMappedColumnsMap(

2949

AlignmentI unaligned, AlignmentI aligned,

2950

List<SequenceI> unmapped)

2951

{

2952

2953

* Map will hold, for each aligned column position, a map of

2954

* {unalignedSequence, characterPerSequence} at that position.

2955

* TreeMap keeps the entries in ascending column order.

2956

2957

SortedMap<Integer, Map<SequenceI, Character>> map = new TreeMap<>();

2958

2959

2960

* record any sequences that have no mapping so can't be realigned

2961

2962

unmapped.addAll(unaligned.getSequences());

2963

2964

List<AlignedCodonFrame> mappings = aligned.getCodonFrames();

2965

2966

for (SequenceI seq : unaligned.getSequences())

2967

{

2968

for (AlignedCodonFrame mapping : mappings)

2969

{

2970

SequenceI fromSeq = mapping.findAlignedSequence(seq, aligned);

2971

if (fromSeq != null)

2972

{

2973

Mapping seqMap = mapping.getMappingBetween(fromSeq, seq);

2974

if (addMappedPositions(seq, fromSeq, seqMap, map))

2975

{

2976

unmapped.remove(seq);

}

}

}

}

return map;

}

/**

* Helper method that adds to a map the mapped column positions of a sequence.

2986

* <br>

2987

* For example if aaTT-Tg-gAAA is mapped to TTTAAA then the map should record

2988

* that columns 3,4,6,10,11,12 map to characters T,T,T,A,A,A of the mapped to

* sequence.

* @param seq

* the sequence whose column positions we are recording

2993

* @param fromSeq

2994

* a sequence that is mapped to the first sequence

2995

* @param seqMap

2996

* the mapping from 'fromSeq' to 'seq'

2997

* @param map

2998

* a map to add the column positions (in fromSeq) of the mapped

* positions of seq

* @return

static boolean addMappedPositions(SequenceI seq, SequenceI fromSeq,

3003

Mapping seqMap, Map<Integer, Map<SequenceI, Character>> map)

{

if (seqMap == null)

{

return false;

}

* invert mapping if it is from unaligned to aligned sequence

3012

3013

if (seqMap.getTo() == fromSeq.getDatasetSequence())

3014

{

3015

seqMap = new Mapping(seq.getDatasetSequence(),

3016

seqMap.getMap().getInverse());

3017

}

3018

3019

int toStart = seq.getStart();

3020

3021

3022

* traverse [start, end, start, end...] ranges in fromSeq

3023

3024

for (int[] fromRange : seqMap.getMap().getFromRanges())

3025

{

3026

for (int i = 0; i < fromRange.length - 1; i += 2)

3027

{

3028

boolean forward = fromRange[i + 1] >= fromRange[i];

3029

3030

3031

* find the range mapped to (sequence positions base 1)

3032

3033

int[] range = seqMap.locateMappedRange(fromRange[i],

fromRange[i + 1]);

if (range == null)

{

System.err.println("Error in mapping " + seqMap + " from "

3038

+ fromSeq.getName());

3039

return false;

3040

}

3041

int fromCol = fromSeq.findIndex(fromRange[i]);

3042

int mappedCharPos = range[0];

3043

3044

3045

* walk over the 'from' aligned sequence in forward or reverse

3046

* direction; when a non-gap is found, record the column position

3047

* of the next character of the mapped-to sequence; stop when all

3048

* the characters of the range have been counted

3049

3050

while (mappedCharPos <= range[1] && fromCol <= fromSeq.getLength()

3051

&& fromCol >= 0)

3052

{

3053

if (!Comparison.isGap(fromSeq.getCharAt(fromCol - 1)))

3054

{

3055

3056

* mapped from sequence has a character in this column

3057

* record the column position for the mapped to character

3058

3059

Map<SequenceI, Character> seqsMap = map.get(fromCol);

3060

if (seqsMap == null)

3061

{

3062

seqsMap = new HashMap<>();

3063

map.put(fromCol, seqsMap);

3064

}

3065

seqsMap.put(seq, seq.getCharAt(mappedCharPos - toStart));

3066

mappedCharPos++;

3067

}

3068

fromCol += (forward ? 1 : -1);

}

}

}

return true;

}

// strictly temporary hack until proper criteria for aligning protein to cds

3076

// are in place; this is so Ensembl -> fetch xrefs Uniprot aligns the Uniprot

3077

public static boolean looksLikeEnsembl(AlignmentI alignment)

3078

{

3079

for (SequenceI seq : alignment.getSequences())

3080

{

3081

String name = seq.getName();

3082

if (!name.startsWith("ENSG") && !name.startsWith("ENST"))

{

return false;

}

}

return true;

}

}

jalviewX

File AlignmentUtils.java

Coverage histogram

Code metrics

Classes

Class AlignmentUtils

Class AlignmentUtils.DnaVariant

Contributing tests

Contributing tests

Source view