Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
EmblXmlSourceTest | 47 | 231 | 10 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.ws.dbsources; | |
22 | ||
23 | import static org.testng.AssertJUnit.assertEquals; | |
24 | import static org.testng.AssertJUnit.assertNotNull; | |
25 | import static org.testng.AssertJUnit.assertNull; | |
26 | import static org.testng.AssertJUnit.assertSame; | |
27 | import static org.testng.AssertJUnit.assertTrue; | |
28 | ||
29 | import java.io.ByteArrayInputStream; | |
30 | import java.util.ArrayList; | |
31 | import java.util.Arrays; | |
32 | import java.util.List; | |
33 | ||
34 | import org.testng.annotations.BeforeClass; | |
35 | import org.testng.annotations.Test; | |
36 | ||
37 | import jalview.datamodel.AlignmentI; | |
38 | import jalview.datamodel.DBRefEntry; | |
39 | import jalview.datamodel.DBRefSource; | |
40 | import jalview.datamodel.SequenceI; | |
41 | import jalview.util.MapList; | |
42 | import jalview.xml.binding.embl.EntryType; | |
43 | import jalview.xml.binding.embl.EntryType.Feature; | |
44 | import jalview.xml.binding.embl.EntryType.Feature.Qualifier; | |
45 | import jalview.xml.binding.embl.XrefType; | |
46 | ||
47 | public class EmblXmlSourceTest | |
48 | { | |
49 | ||
50 | // adapted from http://www.ebi.ac.uk/ena/data/view/X07547&display=xml | |
51 | // dna and translations truncated for convenience | |
52 | static final String TESTDATA = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" | |
53 | + "<ROOT>" | |
54 | + "<entry accession=\"X07547\" version=\"1\" entryVersion=\"8\"" | |
55 | + " dataClass=\"STD\" taxonomicDivision=\"PRO\"" | |
56 | + " moleculeType=\"genomic DNA\" sequenceLength=\"7499\" topology=\"linear\"" | |
57 | + " firstPublic=\"1988-11-10\" firstPublicRelease=\"18\"" | |
58 | + " lastUpdated=\"1999-02-10\" lastUpdatedRelease=\"58\">" | |
59 | + "<secondaryAccession>X07574</secondaryAccession>" | |
60 | + "<description>C. trachomatis plasmid</description>" | |
61 | + "<keyword>plasmid</keyword><keyword>unidentified reading frame</keyword>" | |
62 | + "<xref db=\"EuropePMC\" id=\"PMC107176\" secondaryId=\"9573186\" />" | |
63 | + "<xref db=\"MD5\" id=\"ac73317\" />" | |
64 | /* | |
65 | * first CDS (range and translation changed to keep test data manageable) | |
66 | */ | |
67 | + "<feature name=\"CDS\" location=\"complement(46..57)\">" | |
68 | // test the case of >1 cross-ref to the same database (JAL-2029) | |
69 | + "<xref db=\"UniProtKB/Swiss-Prot\" id=\"B0BCM4\" secondaryId=\"2.1\" />" | |
70 | + "<xref db=\"UniProtKB/Swiss-Prot\" id=\"P0CE20\" />" | |
71 | + "<qualifier name=\"note\"><value>ORF 8 (AA 1-330)</value></qualifier>" | |
72 | + "<qualifier name=\"protein_id\"><value>CAA30420.1</value></qualifier>" | |
73 | + "<qualifier name=\"translation\"><value>MLCF</value></qualifier>" | |
74 | + "</feature>" | |
75 | /* | |
76 | * second CDS (range and translation changed to keep test data manageable) | |
77 | */ | |
78 | + "<feature name=\"CDS\" location=\"4..15\">" | |
79 | + "<xref db=\"UniProtKB/Swiss-Prot\" id=\"B0BCM3\" />" | |
80 | + "<qualifier name=\"protein_id\"><value>CAA30421.1</value></qualifier>" | |
81 | + "<qualifier name=\"translation\"><value>MSSS</value></qualifier>" | |
82 | + "</feature>" | |
83 | /* | |
84 | * third CDS is made up - has no xref - code should synthesize | |
85 | * one to an assumed EMBLCDSPROTEIN accession | |
86 | */ | |
87 | + "<feature name=\"CDS\" location=\"join(4..6,10..15)\">" | |
88 | + "<qualifier name=\"protein_id\"><value>CAA12345.6</value></qualifier>" | |
89 | + "<qualifier name=\"translation\"><value>MSS</value></qualifier>" | |
90 | + "</feature>" | |
91 | /* | |
92 | * sequence (modified for test purposes) | |
93 | * emulates EMBL XML 1.2 which splits sequence data every 60 characters | |
94 | * see EmblSequence.setSequence | |
95 | */ | |
96 | + "<sequence>GGTATGTCCTCTAGTACAAAC\n" | |
97 | + "ACCCCCAATATTGTGATATAATTAAAAACATAGCAT" | |
98 | + "</sequence></entry></ROOT>"; | |
99 | ||
100 | private EmblXmlSource testee; | |
101 | ||
102 | 1 | @BeforeClass(alwaysRun = true) |
103 | public void setUp() | |
104 | { | |
105 | 1 | testee = new EmblXmlSource() |
106 | { | |
107 | ||
108 | 0 | @Override |
109 | public String getDbSource() | |
110 | { | |
111 | 0 | return null; |
112 | } | |
113 | ||
114 | 0 | @Override |
115 | public String getDbName() | |
116 | { | |
117 | 0 | return null; |
118 | } | |
119 | ||
120 | 0 | @Override |
121 | public String getTestQuery() | |
122 | { | |
123 | 0 | return null; |
124 | } | |
125 | ||
126 | 0 | @Override |
127 | public AlignmentI getSequenceRecords(String queries) throws Exception | |
128 | { | |
129 | 0 | return null; |
130 | } | |
131 | }; | |
132 | } | |
133 | ||
134 | 1 | @Test(groups = "Functional") |
135 | public void testGetCdsRanges() | |
136 | { | |
137 | /* | |
138 | * Make a (CDS) Feature with 5 locations | |
139 | */ | |
140 | 1 | Feature cds = new Feature(); |
141 | 1 | cds.setLocation( |
142 | "join(10..20,complement(30..40),50..60,70..80,complement(110..120))"); | |
143 | ||
144 | 1 | int[] exons = testee.getCdsRanges("EMBL", cds); |
145 | 1 | assertEquals("[10, 20, 40, 30, 50, 60, 70, 80, 120, 110]", |
146 | Arrays.toString(exons)); | |
147 | } | |
148 | ||
149 | 1 | @Test(groups = "Functional") |
150 | public void testGetSequence() | |
151 | { | |
152 | // not the whole sequence but enough for this test... | |
153 | 1 | List<SequenceI> peptides = new ArrayList<>(); |
154 | 1 | List<EntryType> entries = getEmblEntries(); |
155 | 1 | assertEquals(1, entries.size()); |
156 | 1 | EntryType entry = entries.get(0); |
157 | 1 | String sourceDb = "EMBL"; |
158 | 1 | SequenceI dna = testee.getSequence(sourceDb, entry, peptides); |
159 | ||
160 | /* | |
161 | * newline has been removed from sequence | |
162 | */ | |
163 | 1 | String seq = dna.getSequenceAsString(); |
164 | 1 | assertEquals( |
165 | "GGTATGTCCTCTAGTACAAACACCCCCAATATTGTGATATAATTAAAAACATAGCAT", | |
166 | seq); | |
167 | ||
168 | /* | |
169 | * peptides should now have five entries: | |
170 | * EMBL product and two Uniprot accessions for the first CDS / translation | |
171 | * EMBL product and one Uniprot accession for the second CDS / " | |
172 | * EMBL product only for the third | |
173 | */ | |
174 | 1 | assertEquals(6, peptides.size()); |
175 | 1 | assertEquals("CAA30420.1", peptides.get(0).getName()); |
176 | 1 | assertEquals("MLCF", peptides.get(0).getSequenceAsString()); |
177 | 1 | assertEquals("UNIPROT|B0BCM4", peptides.get(1).getName()); |
178 | 1 | assertEquals("MLCF", peptides.get(1).getSequenceAsString()); |
179 | 1 | assertEquals("UNIPROT|P0CE20", peptides.get(2).getName()); |
180 | 1 | assertEquals("MLCF", peptides.get(2).getSequenceAsString()); |
181 | 1 | assertEquals("CAA30421.1", peptides.get(3).getName()); |
182 | 1 | assertEquals("MSSS", peptides.get(3).getSequenceAsString()); |
183 | 1 | assertEquals("UNIPROT|B0BCM3", peptides.get(4).getName()); |
184 | 1 | assertEquals("MSSS", peptides.get(4).getSequenceAsString()); |
185 | 1 | assertEquals("CAA12345.6", peptides.get(5).getName()); |
186 | 1 | assertEquals("MSS", peptides.get(5).getSequenceAsString()); |
187 | ||
188 | /* | |
189 | * verify dna sequence has dbrefs | |
190 | * - to 'self' (synthesized dbref) | |
191 | * - to EuropePMC | |
192 | * - to MD5 (with null version as "0") | |
193 | * - with CDS mappings to the peptide 'products' | |
194 | */ | |
195 | 1 | MapList mapToSelf = new MapList(new int[] { 1, 57 }, |
196 | new int[] | |
197 | { 1, 57 }, 1, 1); | |
198 | 1 | MapList cds1Map = new MapList(new int[] { 57, 46 }, new int[] { 1, 4 }, |
199 | 3, 1); | |
200 | 1 | MapList cds2Map = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, |
201 | 3, 1); | |
202 | 1 | MapList cds3Map = new MapList(new int[] { 4, 6, 10, 15 }, |
203 | new int[] | |
204 | { 1, 3 }, 3, 1); | |
205 | ||
206 | 1 | List<DBRefEntry> dbrefs = dna.getDBRefs(); |
207 | 1 | assertEquals(7, dbrefs.size()); |
208 | ||
209 | 1 | DBRefEntry dbRefEntry = dbrefs.get(0); |
210 | 1 | assertEquals("EMBL", dbRefEntry.getSource()); |
211 | 1 | assertEquals("X07547", dbRefEntry.getAccessionId()); |
212 | 1 | assertEquals("1", dbRefEntry.getVersion()); |
213 | 1 | assertNotNull(dbRefEntry.getMap()); |
214 | 1 | assertNull(dbRefEntry.getMap().getTo()); |
215 | 1 | assertEquals(mapToSelf, dbRefEntry.getMap().getMap()); |
216 | ||
217 | 1 | dbRefEntry = dbrefs.get(1); |
218 | // DBRefEntry constructor puts dbSource in upper case | |
219 | 1 | assertEquals("EUROPEPMC", dbRefEntry.getSource()); |
220 | 1 | assertEquals("PMC107176", dbRefEntry.getAccessionId()); |
221 | 1 | assertEquals("9573186", dbRefEntry.getVersion()); |
222 | 1 | assertNull(dbRefEntry.getMap()); |
223 | ||
224 | 1 | dbRefEntry = dbrefs.get(2); |
225 | 1 | assertEquals("MD5", dbRefEntry.getSource()); |
226 | 1 | assertEquals("ac73317", dbRefEntry.getAccessionId()); |
227 | 1 | assertEquals("0", dbRefEntry.getVersion()); |
228 | 1 | assertNull(dbRefEntry.getMap()); |
229 | ||
230 | 1 | dbRefEntry = dbrefs.get(3); |
231 | 1 | assertEquals("UNIPROT", dbRefEntry.getSource()); |
232 | 1 | assertEquals("B0BCM4", dbRefEntry.getAccessionId()); |
233 | 1 | assertSame(peptides.get(1), dbRefEntry.getMap().getTo()); |
234 | 1 | assertEquals(cds1Map, dbRefEntry.getMap().getMap()); |
235 | ||
236 | 1 | dbRefEntry = dbrefs.get(4); |
237 | 1 | assertEquals("UNIPROT", dbRefEntry.getSource()); |
238 | 1 | assertEquals("P0CE20", dbRefEntry.getAccessionId()); |
239 | 1 | assertSame(peptides.get(2), dbRefEntry.getMap().getTo()); |
240 | 1 | assertEquals(cds1Map, dbRefEntry.getMap().getMap()); |
241 | ||
242 | 1 | dbRefEntry = dbrefs.get(5); |
243 | 1 | assertEquals("UNIPROT", dbRefEntry.getSource()); |
244 | 1 | assertEquals("B0BCM3", dbRefEntry.getAccessionId()); |
245 | 1 | assertSame(peptides.get(4), dbRefEntry.getMap().getTo()); |
246 | 1 | assertEquals(cds2Map, dbRefEntry.getMap().getMap()); |
247 | ||
248 | 1 | dbRefEntry = dbrefs.get(6); |
249 | 1 | assertEquals("EMBLCDSPROTEIN", dbRefEntry.getSource()); |
250 | 1 | assertEquals("CAA12345.6", dbRefEntry.getAccessionId()); |
251 | 1 | assertSame(peptides.get(5), dbRefEntry.getMap().getTo()); |
252 | 1 | assertEquals(cds3Map, dbRefEntry.getMap().getMap()); |
253 | ||
254 | /* | |
255 | * verify peptides have dbrefs | |
256 | * - to EMBL sequence (with inverse 1:3 cds mapping) | |
257 | * - to EMBLCDS (with 1:3 mapping) | |
258 | * - direct (no mapping) to other protein accessions | |
259 | */ | |
260 | 1 | MapList proteinToCdsMap1 = new MapList(new int[] { 1, 4 }, |
261 | new int[] | |
262 | { 1, 12 }, 1, 3); | |
263 | 1 | MapList proteinToCdsMap2 = new MapList(new int[] { 1, 3 }, |
264 | new int[] | |
265 | { 1, 9 }, 1, 3); | |
266 | ||
267 | // dbrefs for first CDS EMBL product CAA30420.1 | |
268 | 1 | dbrefs = peptides.get(0).getDBRefs(); |
269 | 1 | assertEquals(5, dbrefs.size()); |
270 | 1 | assertEquals(DBRefSource.EMBL, dbrefs.get(0).getSource()); |
271 | 1 | assertEquals("CAA30420.1", dbrefs.get(0).getAccessionId()); |
272 | // TODO: verify getPrimaryDBRefs() for peptide products | |
273 | 1 | assertEquals(cds1Map.getInverse(), dbrefs.get(0).getMap().getMap()); |
274 | 1 | assertEquals(DBRefSource.EMBLCDS, dbrefs.get(1).getSource()); |
275 | 1 | assertEquals("CAA30420.1", dbrefs.get(1).getAccessionId()); |
276 | 1 | assertEquals(proteinToCdsMap1, dbrefs.get(1).getMap().getMap()); |
277 | 1 | assertEquals(DBRefSource.EMBLCDSProduct, dbrefs.get(2).getSource()); |
278 | 1 | assertEquals("CAA30420.1", dbrefs.get(2).getAccessionId()); |
279 | 1 | assertNull(dbrefs.get(2).getMap()); |
280 | 1 | assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "2.1", "B0BCM4"), |
281 | dbrefs.get(3)); | |
282 | 1 | assertNull(dbrefs.get(3).getMap()); |
283 | 1 | assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "P0CE20"), |
284 | dbrefs.get(4)); | |
285 | 1 | assertNull(dbrefs.get(4).getMap()); |
286 | ||
287 | // dbrefs for first CDS first Uniprot xref | |
288 | 1 | dbrefs = peptides.get(1).getDBRefs(); |
289 | 1 | assertEquals(2, dbrefs.size()); |
290 | 1 | assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "2.1", "B0BCM4"), |
291 | dbrefs.get(0)); | |
292 | 1 | assertNull(dbrefs.get(0).getMap()); |
293 | 1 | assertEquals(DBRefSource.EMBL, dbrefs.get(1).getSource()); |
294 | 1 | assertEquals("X07547", dbrefs.get(1).getAccessionId()); |
295 | 1 | assertEquals(cds1Map.getInverse(), dbrefs.get(1).getMap().getMap()); |
296 | ||
297 | // dbrefs for first CDS second Uniprot xref | |
298 | 1 | dbrefs = peptides.get(2).getDBRefs(); |
299 | 1 | assertEquals(2, dbrefs.size()); |
300 | 1 | assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "P0CE20"), |
301 | dbrefs.get(0)); | |
302 | 1 | assertNull(dbrefs.get(0).getMap()); |
303 | 1 | assertEquals(DBRefSource.EMBL, dbrefs.get(1).getSource()); |
304 | 1 | assertEquals("X07547", dbrefs.get(1).getAccessionId()); |
305 | 1 | assertEquals(cds1Map.getInverse(), dbrefs.get(1).getMap().getMap()); |
306 | ||
307 | // dbrefs for second CDS EMBL product CAA30421.1 | |
308 | 1 | dbrefs = peptides.get(3).getDBRefs(); |
309 | 1 | assertEquals(4, dbrefs.size()); |
310 | 1 | assertEquals(DBRefSource.EMBL, dbrefs.get(0).getSource()); |
311 | 1 | assertEquals("CAA30421.1", dbrefs.get(0).getAccessionId()); |
312 | 1 | assertEquals(cds2Map.getInverse(), dbrefs.get(0).getMap().getMap()); |
313 | 1 | assertEquals(DBRefSource.EMBLCDS, dbrefs.get(1).getSource()); |
314 | 1 | assertEquals("CAA30421.1", dbrefs.get(1).getAccessionId()); |
315 | 1 | assertEquals(proteinToCdsMap1, dbrefs.get(1).getMap().getMap()); |
316 | 1 | assertEquals(DBRefSource.EMBLCDSProduct, dbrefs.get(2).getSource()); |
317 | 1 | assertEquals("CAA30421.1", dbrefs.get(2).getAccessionId()); |
318 | 1 | assertNull(dbrefs.get(2).getMap()); |
319 | 1 | assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "B0BCM3"), |
320 | dbrefs.get(3)); | |
321 | 1 | assertNull(dbrefs.get(3).getMap()); |
322 | ||
323 | // dbrefs for second CDS second Uniprot xref | |
324 | 1 | dbrefs = peptides.get(4).getDBRefs(); |
325 | 1 | assertEquals(2, dbrefs.size()); |
326 | 1 | assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "B0BCM3"), |
327 | dbrefs.get(0)); | |
328 | 1 | assertNull(dbrefs.get(0).getMap()); |
329 | 1 | assertEquals(DBRefSource.EMBL, dbrefs.get(1).getSource()); |
330 | 1 | assertEquals("X07547", dbrefs.get(1).getAccessionId()); |
331 | 1 | assertEquals(cds2Map.getInverse(), dbrefs.get(1).getMap().getMap()); |
332 | ||
333 | // dbrefs for third CDS inferred EMBL product CAA12345.6 | |
334 | 1 | dbrefs = peptides.get(5).getDBRefs(); |
335 | 1 | assertEquals(3, dbrefs.size()); |
336 | 1 | assertEquals(DBRefSource.EMBL, dbrefs.get(0).getSource()); |
337 | 1 | assertEquals("CAA12345.6", dbrefs.get(0).getAccessionId()); |
338 | 1 | assertEquals(cds3Map.getInverse(), dbrefs.get(0).getMap().getMap()); |
339 | 1 | assertEquals(DBRefSource.EMBLCDS, dbrefs.get(1).getSource()); |
340 | 1 | assertEquals("CAA12345.6", dbrefs.get(1).getAccessionId()); |
341 | 1 | assertEquals(proteinToCdsMap2, dbrefs.get(1).getMap().getMap()); |
342 | 1 | assertEquals(DBRefSource.EMBLCDSProduct, dbrefs.get(2).getSource()); |
343 | 1 | assertEquals("CAA12345.6", dbrefs.get(2).getAccessionId()); |
344 | 1 | assertNull(dbrefs.get(2).getMap()); |
345 | } | |
346 | ||
347 | 1 | @Test(groups = "Functional") |
348 | public void testAdjustForProteinLength() | |
349 | { | |
350 | 1 | int[] exons = new int[] { 11, 15, 21, 25, 31, 38 }; // 18 bp |
351 | ||
352 | // exact length match: | |
353 | 1 | assertSame(exons, EmblXmlSource.adjustForProteinLength(6, exons)); |
354 | ||
355 | // truncate last exon by 3bp (e.g. stop codon) | |
356 | 1 | int[] truncated = EmblXmlSource.adjustForProteinLength(5, exons); |
357 | 1 | assertEquals("[11, 15, 21, 25, 31, 35]", Arrays.toString(truncated)); |
358 | ||
359 | // truncate last exon by 6bp | |
360 | 1 | truncated = EmblXmlSource.adjustForProteinLength(4, exons); |
361 | 1 | assertEquals("[11, 15, 21, 25, 31, 32]", Arrays.toString(truncated)); |
362 | ||
363 | // remove last exon and truncate preceding by 1bp | |
364 | 1 | truncated = EmblXmlSource.adjustForProteinLength(3, exons); |
365 | 1 | assertEquals("[11, 15, 21, 24]", Arrays.toString(truncated)); |
366 | ||
367 | // exact removal of exon case: | |
368 | 1 | exons = new int[] { 11, 15, 21, 27, 33, 38 }; // 18 bp |
369 | 1 | truncated = EmblXmlSource.adjustForProteinLength(4, exons); |
370 | 1 | assertEquals("[11, 15, 21, 27]", Arrays.toString(truncated)); |
371 | ||
372 | // what if exons are too short for protein? | |
373 | 1 | truncated = EmblXmlSource.adjustForProteinLength(7, exons); |
374 | 1 | assertSame(exons, truncated); |
375 | } | |
376 | ||
377 | 1 | @Test(groups = { "Functional" }) |
378 | public void testGetEmblEntries() | |
379 | { | |
380 | 1 | List<EntryType> entries = getEmblEntries(); |
381 | 1 | assertEquals(1, entries.size()); |
382 | 1 | EntryType entry = entries.get(0); |
383 | ||
384 | 1 | assertEquals("X07547", entry.getAccession()); |
385 | 1 | assertEquals("C. trachomatis plasmid", entry.getDescription()); |
386 | 1 | assertEquals("STD", entry.getDataClass()); |
387 | 1 | assertEquals("PRO", entry.getTaxonomicDivision()); |
388 | 1 | assertEquals("1999-02-10", entry.getLastUpdated().toString()); |
389 | 1 | assertEquals(58, entry.getLastUpdatedRelease().intValue()); |
390 | 1 | assertEquals("1988-11-10", entry.getFirstPublic().toString()); |
391 | 1 | assertEquals(18, entry.getFirstPublicRelease().intValue()); |
392 | 1 | assertEquals("genomic DNA", entry.getMoleculeType()); |
393 | 1 | assertEquals(1, entry.getVersion().intValue()); |
394 | 1 | assertEquals(8, entry.getEntryVersion().intValue()); |
395 | 1 | assertEquals("linear", entry.getTopology()); |
396 | 1 | assertEquals(7499, entry.getSequenceLength().intValue()); |
397 | 1 | assertEquals(2, entry.getKeyword().size()); |
398 | 1 | assertEquals("plasmid", entry.getKeyword().get(0)); |
399 | 1 | assertEquals("unidentified reading frame", entry.getKeyword().get(1)); |
400 | ||
401 | /* | |
402 | * dbrefs | |
403 | */ | |
404 | 1 | assertEquals(2, entry.getXref().size()); |
405 | 1 | XrefType dbref = entry.getXref().get(0); |
406 | 1 | assertEquals("EuropePMC", dbref.getDb()); |
407 | 1 | assertEquals("PMC107176", dbref.getId()); |
408 | 1 | assertEquals("9573186", dbref.getSecondaryId()); |
409 | 1 | dbref = entry.getXref().get(1); |
410 | 1 | assertEquals("MD5", dbref.getDb()); |
411 | 1 | assertEquals("ac73317", dbref.getId()); |
412 | 1 | assertNull(dbref.getSecondaryId()); |
413 | ||
414 | /* | |
415 | * three sequence features for CDS | |
416 | */ | |
417 | 1 | assertEquals(3, entry.getFeature().size()); |
418 | /* | |
419 | * first CDS | |
420 | */ | |
421 | 1 | Feature ef = entry.getFeature().get(0); |
422 | 1 | assertEquals("CDS", ef.getName()); |
423 | 1 | assertEquals("complement(46..57)", ef.getLocation()); |
424 | 1 | assertEquals(2, ef.getXref().size()); |
425 | 1 | dbref = ef.getXref().get(0); |
426 | 1 | assertEquals("UniProtKB/Swiss-Prot", dbref.getDb()); |
427 | 1 | assertEquals("B0BCM4", dbref.getId()); |
428 | 1 | assertEquals("2.1", dbref.getSecondaryId()); |
429 | 1 | dbref = ef.getXref().get(1); |
430 | 1 | assertEquals("UniProtKB/Swiss-Prot", dbref.getDb()); |
431 | 1 | assertEquals("P0CE20", dbref.getId()); |
432 | 1 | assertNull(dbref.getSecondaryId()); |
433 | // CDS feature qualifiers | |
434 | 1 | assertEquals(3, ef.getQualifier().size()); |
435 | 1 | Qualifier q = ef.getQualifier().get(0); |
436 | 1 | assertEquals("note", q.getName()); |
437 | 1 | assertEquals("ORF 8 (AA 1-330)", q.getValue()); |
438 | 1 | q = ef.getQualifier().get(1); |
439 | 1 | assertEquals("protein_id", q.getName()); |
440 | 1 | assertEquals("CAA30420.1", q.getValue()); |
441 | 1 | q = ef.getQualifier().get(2); |
442 | 1 | assertEquals("translation", q.getName()); |
443 | 1 | assertEquals("MLCF", q.getValue()); |
444 | ||
445 | /* | |
446 | * second CDS | |
447 | */ | |
448 | 1 | ef = entry.getFeature().get(1); |
449 | 1 | assertEquals("CDS", ef.getName()); |
450 | 1 | assertEquals("4..15", ef.getLocation()); |
451 | 1 | assertEquals(1, ef.getXref().size()); |
452 | 1 | dbref = ef.getXref().get(0); |
453 | 1 | assertEquals("UniProtKB/Swiss-Prot", dbref.getDb()); |
454 | 1 | assertEquals("B0BCM3", dbref.getId()); |
455 | 1 | assertNull(dbref.getSecondaryId()); |
456 | 1 | assertEquals(2, ef.getQualifier().size()); |
457 | 1 | q = ef.getQualifier().get(0); |
458 | 1 | assertEquals("protein_id", q.getName()); |
459 | 1 | assertEquals("CAA30421.1", q.getValue()); |
460 | 1 | q = ef.getQualifier().get(1); |
461 | 1 | assertEquals("translation", q.getName()); |
462 | 1 | assertEquals("MSSS", q.getValue()); |
463 | ||
464 | /* | |
465 | * third CDS | |
466 | */ | |
467 | 1 | ef = entry.getFeature().get(2); |
468 | 1 | assertEquals("CDS", ef.getName()); |
469 | 1 | assertEquals("join(4..6,10..15)", ef.getLocation()); |
470 | 1 | assertNotNull(ef.getXref()); |
471 | 1 | assertTrue(ef.getXref().isEmpty()); |
472 | 1 | assertEquals(2, ef.getQualifier().size()); |
473 | 1 | q = ef.getQualifier().get(0); |
474 | 1 | assertEquals("protein_id", q.getName()); |
475 | 1 | assertEquals("CAA12345.6", q.getValue()); |
476 | 1 | q = ef.getQualifier().get(1); |
477 | 1 | assertEquals("translation", q.getName()); |
478 | 1 | assertEquals("MSS", q.getValue()); |
479 | ||
480 | /* | |
481 | * Sequence - raw data before removal of newlines | |
482 | */ | |
483 | 1 | String seq = entry.getSequence(); |
484 | 1 | assertEquals("GGTATGTCCTCTAGTACAAAC\n" |
485 | + "ACCCCCAATATTGTGATATAATTAAAAACATAGCAT", seq); | |
486 | ||
487 | /* | |
488 | * getSequence() converts empty DBRefEntry.version to "0" | |
489 | */ | |
490 | 1 | assertNull(entry.getXref().get(1).getSecondaryId()); |
491 | 1 | assertNull(entry.getFeature().get(0).getXref().get(1).getSecondaryId()); |
492 | } | |
493 | ||
494 | 2 | List<EntryType> getEmblEntries() |
495 | { | |
496 | 2 | return testee |
497 | .getEmblEntries(new ByteArrayInputStream(TESTDATA.getBytes())); | |
498 | } | |
499 | } |