Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
EnsemblCdna | 40 | 17 | 11 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.ext.ensembl; | |
22 | ||
23 | import jalview.datamodel.SequenceFeature; | |
24 | import jalview.datamodel.SequenceI; | |
25 | import jalview.io.gff.SequenceOntologyI; | |
26 | ||
27 | import java.util.ArrayList; | |
28 | import java.util.List; | |
29 | ||
30 | import com.stevesoft.pat.Regex; | |
31 | ||
32 | /** | |
33 | * A client to fetch CDNA sequence from Ensembl (i.e. that part of the genomic | |
34 | * sequence that is transcribed to RNA, but not necessarily translated to | |
35 | * protein) | |
36 | * | |
37 | * @author gmcarstairs | |
38 | * | |
39 | */ | |
40 | public class EnsemblCdna extends EnsemblSeqProxy | |
41 | { | |
42 | /* | |
43 | * accepts ENST or ENSTG with 11 digits | |
44 | * or ENSMUST or similar for other species | |
45 | * or CCDSnnnnn.nn with at least 3 digits | |
46 | */ | |
47 | private static final Regex ACCESSION_REGEX = new Regex( | |
48 | "(ENS([A-Z]{3}|)[TG][0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)"); | |
49 | ||
50 | /* | |
51 | * fetch exon features on genomic sequence (to identify the cdna regions) | |
52 | * and cds and variation features (to retain) | |
53 | */ | |
54 | private static final EnsemblFeatureType[] FEATURES_TO_FETCH = { | |
55 | EnsemblFeatureType.exon, EnsemblFeatureType.cds, | |
56 | EnsemblFeatureType.variation }; | |
57 | ||
58 | /** | |
59 | * Default constructor (to use rest.ensembl.org) | |
60 | */ | |
61 | 10 | public EnsemblCdna() |
62 | { | |
63 | 10 | super(); |
64 | } | |
65 | ||
66 | /** | |
67 | * Constructor given the target domain to fetch data from | |
68 | * | |
69 | * @param d | |
70 | */ | |
71 | 0 | public EnsemblCdna(String d) |
72 | { | |
73 | 0 | super(d); |
74 | } | |
75 | ||
76 | 0 | @Override |
77 | public String getDbName() | |
78 | { | |
79 | 0 | return "ENSEMBL (CDNA)"; |
80 | } | |
81 | ||
82 | 0 | @Override |
83 | protected EnsemblSeqType getSourceEnsemblType() | |
84 | { | |
85 | 0 | return EnsemblSeqType.CDNA; |
86 | } | |
87 | ||
88 | 6 | @Override |
89 | public Regex getAccessionValidator() | |
90 | { | |
91 | 6 | return ACCESSION_REGEX; |
92 | } | |
93 | ||
94 | 0 | @Override |
95 | protected EnsemblFeatureType[] getFeaturesToFetch() | |
96 | { | |
97 | 0 | return FEATURES_TO_FETCH; |
98 | } | |
99 | ||
100 | /** | |
101 | * Answers true unless the feature type is 'transcript' (or a sub-type in the | |
102 | * Sequence Ontology). | |
103 | */ | |
104 | 7 | @Override |
105 | protected boolean retainFeature(SequenceFeature sf, String accessionId) | |
106 | { | |
107 | 7 | if (isTranscript(sf.getType())) |
108 | { | |
109 | 3 | return false; |
110 | } | |
111 | 4 | return featureMayBelong(sf, accessionId); |
112 | } | |
113 | ||
114 | /** | |
115 | * Answers a list of sequence features (if any) whose type is 'exon' (or a | |
116 | * subtype of exon in the Sequence Ontology), and whose Parent is the | |
117 | * transcript we are retrieving | |
118 | */ | |
119 | 4 | @Override |
120 | protected List<SequenceFeature> getIdentifyingFeatures(SequenceI seq, | |
121 | String accId) | |
122 | { | |
123 | 4 | List<SequenceFeature> result = new ArrayList<>(); |
124 | 4 | List<SequenceFeature> sfs = seq.getFeatures() |
125 | .getFeaturesByOntology(SequenceOntologyI.EXON); | |
126 | 4 | for (SequenceFeature sf : sfs) |
127 | { | |
128 | 12 | String parentFeature = (String) sf.getValue(PARENT); |
129 | 12 | if (accId.equals(parentFeature)) |
130 | { | |
131 | 6 | result.add(sf); |
132 | } | |
133 | } | |
134 | ||
135 | 4 | return result; |
136 | } | |
137 | ||
138 | /** | |
139 | * Parameter object_type=Transcaript added to ensure cdna and not peptide is | |
140 | * returned (JAL-2529) | |
141 | */ | |
142 | 0 | @Override |
143 | protected String getObjectType() | |
144 | { | |
145 | 0 | return OBJECT_TYPE_TRANSCRIPT; |
146 | } | |
147 | ||
148 | } |