1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.io; |
22 |
|
|
23 |
|
import static org.testng.Assert.assertEquals; |
24 |
|
import static org.testng.Assert.assertTrue; |
25 |
|
import static org.testng.AssertJUnit.assertNotNull; |
26 |
|
import static org.testng.AssertJUnit.assertNull; |
27 |
|
import static org.testng.AssertJUnit.assertSame; |
28 |
|
import static org.testng.AssertJUnit.fail; |
29 |
|
|
30 |
|
import java.io.File; |
31 |
|
import java.io.IOException; |
32 |
|
import java.net.MalformedURLException; |
33 |
|
import java.util.Arrays; |
34 |
|
import java.util.List; |
35 |
|
import java.util.Set; |
36 |
|
|
37 |
|
import org.testng.annotations.BeforeClass; |
38 |
|
import org.testng.annotations.Test; |
39 |
|
|
40 |
|
import jalview.bin.Console; |
41 |
|
import jalview.datamodel.DBRefEntry; |
42 |
|
import jalview.datamodel.Mapping; |
43 |
|
import jalview.datamodel.Sequence.DBModList; |
44 |
|
import jalview.datamodel.SequenceFeature; |
45 |
|
import jalview.datamodel.SequenceI; |
46 |
|
import jalview.datamodel.features.SequenceFeatures; |
47 |
|
import jalview.util.MapList; |
48 |
|
|
|
|
| 0% |
Uncovered Elements: 199 (199) |
Complexity: 16 |
Complexity Density: 0.09 |
|
49 |
|
public class EmblFlatFileTest |
50 |
|
{ |
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
51 |
0 |
@BeforeClass(alwaysRun = true)... |
52 |
|
public void setUp() |
53 |
|
{ |
54 |
0 |
Console.initLogger(); |
55 |
|
} |
56 |
|
|
57 |
|
|
58 |
|
|
59 |
|
|
60 |
|
|
61 |
|
@throws |
62 |
|
@throws |
63 |
|
|
|
|
| 0% |
Uncovered Elements: 134 (134) |
Complexity: 10 |
Complexity Density: 0.08 |
4-
|
|
64 |
0 |
@Test(groups = "Functional")... |
65 |
|
public void testParse() throws MalformedURLException, IOException |
66 |
|
{ |
67 |
0 |
File dataFile = new File("test/jalview/io/J03321.embl.txt"); |
68 |
0 |
FileParse fp = new FileParse(dataFile, DataSourceType.FILE); |
69 |
0 |
EmblFlatFile parser = new EmblFlatFile(fp, "EmblTest"); |
70 |
0 |
List<SequenceI> seqs = parser.getSeqs(); |
71 |
|
|
72 |
0 |
assertEquals(seqs.size(), 1); |
73 |
0 |
SequenceI seq = seqs.get(0); |
74 |
0 |
assertEquals(seq.getName(), "EmblTest|J03321"); |
75 |
0 |
assertEquals(seq.getLength(), 7502); |
76 |
0 |
assertEquals(seq.getDescription(), |
77 |
|
"Chlamydia trachomatis plasmid pCHL1, complete sequence"); |
78 |
|
|
79 |
|
|
80 |
|
|
81 |
|
|
82 |
0 |
Set<String> featureTypes = seq.getFeatures().getFeatureTypes(); |
83 |
0 |
assertEquals(featureTypes.size(), 1); |
84 |
0 |
assertTrue(featureTypes.contains("CDS")); |
85 |
|
|
86 |
|
|
87 |
|
|
88 |
|
|
89 |
0 |
List<SequenceFeature> features = seq.getFeatures() |
90 |
|
.getAllFeatures("CDS"); |
91 |
0 |
SequenceFeatures.sortFeatures(features, true); |
92 |
0 |
assertEquals(features.size(), 9); |
93 |
|
|
94 |
0 |
SequenceFeature sf = features.get(0); |
95 |
0 |
assertEquals(sf.getBegin(), 1); |
96 |
0 |
assertEquals(sf.getEnd(), 437); |
97 |
0 |
assertEquals(sf.getDescription(), |
98 |
|
"Exon 2 for protein EMBLCDS:AAA91567.1"); |
99 |
0 |
assertEquals(sf.getFeatureGroup(), "EmblTest"); |
100 |
0 |
assertEquals(sf.getEnaLocation(), "join(7022..7502,1..437)"); |
101 |
0 |
assertEquals(sf.getPhase(), "0"); |
102 |
0 |
assertEquals(sf.getStrand(), 1); |
103 |
0 |
assertEquals(sf.getValue("note"), "pGP7-D"); |
104 |
|
|
105 |
0 |
assertEquals(sf.getValue("exon number"), 2); |
106 |
0 |
assertEquals(sf.getValue("product"), "hypothetical protein"); |
107 |
0 |
assertEquals(sf.getValue("transl_table"), "11"); |
108 |
|
|
109 |
0 |
sf = features.get(1); |
110 |
0 |
assertEquals(sf.getBegin(), 488); |
111 |
0 |
assertEquals(sf.getEnd(), 1480); |
112 |
0 |
assertEquals(sf.getDescription(), |
113 |
|
"Exon 1 for protein EMBLCDS:AAA91568.1"); |
114 |
0 |
assertEquals(sf.getFeatureGroup(), "EmblTest"); |
115 |
0 |
assertEquals(sf.getEnaLocation(), "complement(488..1480)"); |
116 |
0 |
assertEquals(sf.getPhase(), "0"); |
117 |
0 |
assertEquals(sf.getStrand(), -1); |
118 |
0 |
assertEquals(sf.getValue("note"), "pGP8-D"); |
119 |
0 |
assertEquals(sf.getValue("exon number"), 1); |
120 |
0 |
assertEquals(sf.getValue("product"), "hypothetical protein"); |
121 |
|
|
122 |
0 |
sf = features.get(7); |
123 |
0 |
assertEquals(sf.getBegin(), 6045); |
124 |
0 |
assertEquals(sf.getEnd(), 6788); |
125 |
0 |
assertEquals(sf.getDescription(), |
126 |
|
"Exon 1 for protein EMBLCDS:AAA91574.1"); |
127 |
0 |
assertEquals(sf.getFeatureGroup(), "EmblTest"); |
128 |
0 |
assertEquals(sf.getEnaLocation(), "6045..6788"); |
129 |
0 |
assertEquals(sf.getPhase(), "0"); |
130 |
0 |
assertEquals(sf.getStrand(), 1); |
131 |
0 |
assertEquals(sf.getValue("note"), "pGP6-D (gtg start codon)"); |
132 |
0 |
assertEquals(sf.getValue("exon number"), 1); |
133 |
0 |
assertEquals(sf.getValue("product"), "hypothetical protein"); |
134 |
|
|
135 |
|
|
136 |
|
|
137 |
|
|
138 |
0 |
sf = features.get(8); |
139 |
0 |
assertEquals(sf.getBegin(), 7022); |
140 |
0 |
assertEquals(sf.getEnd(), 7502); |
141 |
0 |
assertEquals(sf.getDescription(), |
142 |
|
"Exon 1 for protein EMBLCDS:AAA91567.1"); |
143 |
0 |
assertEquals(sf.getFeatureGroup(), "EmblTest"); |
144 |
0 |
assertEquals(sf.getEnaLocation(), "join(7022..7502,1..437)"); |
145 |
0 |
assertEquals(sf.getPhase(), "0"); |
146 |
0 |
assertEquals(sf.getStrand(), 1); |
147 |
0 |
assertEquals(sf.getValue("note"), "pGP7-D"); |
148 |
0 |
assertEquals(sf.getValue("exon number"), 1); |
149 |
0 |
assertEquals(sf.getValue("product"), "hypothetical protein"); |
150 |
|
|
151 |
|
|
152 |
|
|
153 |
|
|
154 |
|
|
155 |
|
|
156 |
|
|
157 |
0 |
List<DBRefEntry> dbrefs = seq.getDBRefs(); |
158 |
0 |
assertEquals(dbrefs.size(), 32); |
159 |
|
|
160 |
0 |
DBRefEntry selfRef = new DBRefEntry("EMBLTEST", "1", "J03321"); |
161 |
0 |
int[] range = new int[] { 1, seq.getLength() }; |
162 |
0 |
selfRef.setMap(new Mapping(null, range, range, 1, 1)); |
163 |
0 |
assertTrue(dbrefs.contains(selfRef)); |
164 |
|
|
165 |
|
|
166 |
0 |
assertTrue(dbrefs.contains(new DBRefEntry("MD5", "0", |
167 |
|
"d4c4942a634e3df4995fd5ac75c26a61"))); |
168 |
|
|
169 |
0 |
assertTrue( |
170 |
|
dbrefs.contains(new DBRefEntry("EUROPEPMC", "0", "PMC87941"))); |
171 |
|
|
172 |
0 |
assertTrue(dbrefs.contains(new DBRefEntry("GOA", "0", "P0CE19"))); |
173 |
|
|
174 |
0 |
assertTrue( |
175 |
|
dbrefs.contains(new DBRefEntry("INTERPRO", "0", "IPR005350"))); |
176 |
|
|
177 |
|
|
178 |
|
|
179 |
|
|
180 |
0 |
int uniprotCount = 0; |
181 |
0 |
List<int[]> ranges; |
182 |
0 |
for (DBRefEntry dbref : dbrefs) |
183 |
|
{ |
184 |
0 |
if ("UNIPROT".equals(dbref.getSource())) |
185 |
|
{ |
186 |
0 |
uniprotCount++; |
187 |
0 |
Mapping mapping = dbref.getMap(); |
188 |
0 |
assertNotNull(mapping); |
189 |
0 |
MapList map = mapping.getMap(); |
190 |
0 |
String mappedToName = mapping.getTo().getName(); |
191 |
0 |
if ("UNIPROT|P0CE16".equals(mappedToName)) |
192 |
|
{ |
193 |
0 |
assertEquals((ranges = map.getFromRanges()).size(), 1); |
194 |
0 |
assertEquals(ranges.get(0)[0], 1579); |
195 |
0 |
assertEquals(ranges.get(0)[1], 2931); |
196 |
0 |
assertEquals((ranges = map.getToRanges()).size(), 1); |
197 |
0 |
assertEquals(ranges.get(0)[0], 1); |
198 |
0 |
assertEquals(ranges.get(0)[1], 451); |
199 |
|
|
200 |
0 |
assertEquals(mapping.getTo().getDescription(), |
201 |
|
"hypothetical protein"); |
202 |
|
} |
203 |
0 |
else if ("UNIPROT|P0CE17".equals(mappedToName)) |
204 |
|
{ |
205 |
0 |
assertEquals((ranges = map.getFromRanges()).size(), 1); |
206 |
0 |
assertEquals(ranges.get(0)[0], 2928); |
207 |
0 |
assertEquals(ranges.get(0)[1], 3989); |
208 |
0 |
assertEquals((ranges = map.getToRanges()).size(), 1); |
209 |
0 |
assertEquals(ranges.get(0)[0], 1); |
210 |
0 |
assertEquals(ranges.get(0)[1], 354); |
211 |
|
} |
212 |
0 |
else if ("UNIPROT|P0CE18".equals(mappedToName)) |
213 |
|
{ |
214 |
0 |
assertEquals((ranges = map.getFromRanges()).size(), 1); |
215 |
0 |
assertEquals(ranges.get(0)[0], 4054); |
216 |
0 |
assertEquals(ranges.get(0)[1], 4845); |
217 |
0 |
assertEquals((ranges = map.getToRanges()).size(), 1); |
218 |
0 |
assertEquals(ranges.get(0)[0], 1); |
219 |
0 |
assertEquals(ranges.get(0)[1], 264); |
220 |
|
} |
221 |
0 |
else if ("UNIPROT|P0CE19".equals(mappedToName)) |
222 |
|
{ |
223 |
|
|
224 |
0 |
assertEquals((ranges = map.getFromRanges()).size(), 2); |
225 |
0 |
assertEquals(ranges.get(0)[0], 7022); |
226 |
0 |
assertEquals(ranges.get(0)[1], 7502); |
227 |
0 |
assertEquals(ranges.get(1)[0], 1); |
228 |
0 |
assertEquals(ranges.get(1)[1], 434); |
229 |
0 |
assertEquals((ranges = map.getToRanges()).size(), 1); |
230 |
0 |
assertEquals(ranges.get(0)[0], 1); |
231 |
0 |
assertEquals(ranges.get(0)[1], 305); |
232 |
|
} |
233 |
0 |
else if ("UNIPROT|P0CE20".equals(mappedToName)) |
234 |
|
{ |
235 |
|
|
236 |
0 |
assertEquals((ranges = map.getFromRanges()).size(), 1); |
237 |
0 |
assertEquals(ranges.get(0)[0], 1480); |
238 |
0 |
assertEquals(ranges.get(0)[1], 491); |
239 |
0 |
assertEquals((ranges = map.getToRanges()).size(), 1); |
240 |
0 |
assertEquals(ranges.get(0)[0], 1); |
241 |
0 |
assertEquals(ranges.get(0)[1], 330); |
242 |
|
} |
243 |
0 |
else if (!"UNIPROT|P0CE23".equals(mappedToName) |
244 |
|
&& !"UNIPROT|P10559".equals(mappedToName) |
245 |
|
&& !"UNIPROT|P10560".equals(mappedToName)) |
246 |
|
{ |
247 |
0 |
fail("Unexpected UNIPROT dbref to " + mappedToName); |
248 |
|
} |
249 |
|
} |
250 |
|
} |
251 |
0 |
assertEquals(uniprotCount, 8); |
252 |
|
} |
253 |
|
|
254 |
|
|
255 |
|
|
256 |
|
|
257 |
|
|
258 |
|
@throws |
259 |
|
@throws |
260 |
|
|
|
|
| 0% |
Uncovered Elements: 5 (5) |
Complexity: 1 |
Complexity Density: 0.2 |
4-
|
|
261 |
0 |
@Test(groups = "Functional")... |
262 |
|
public void testParseToRNA() throws MalformedURLException, IOException |
263 |
|
{ |
264 |
0 |
File dataFile = new File("test/jalview/io/J03321_rna.embl.txt"); |
265 |
0 |
FileParse fp = new FileParse(dataFile, DataSourceType.FILE); |
266 |
0 |
EmblFlatFile parser = new EmblFlatFile(fp, "EmblTest"); |
267 |
0 |
List<SequenceI> seqs = parser.getSeqs(); |
268 |
0 |
assertTrue(seqs.get(0).getSequenceAsString().indexOf("u") > -1); |
269 |
|
} |
270 |
|
|
|
|
| - |
Uncovered Elements: 0 (0) |
Complexity: 1 |
Complexity Density: - |
4-
|
|
271 |
0 |
@Test(groups = "Functional")... |
272 |
|
public void testParse_codonStartNot1() |
273 |
|
{ |
274 |
|
|
275 |
|
|
276 |
|
} |
277 |
|
|
278 |
|
|
279 |
|
|
280 |
|
|
281 |
|
|
282 |
|
|
283 |
|
@throws |
284 |
|
|
|
|
| 0% |
Uncovered Elements: 35 (35) |
Complexity: 1 |
Complexity Density: 0.03 |
4-
|
|
285 |
0 |
@Test(groups = "Functional")... |
286 |
|
public void testParse_noUniprotXref() throws IOException |
287 |
|
{ |
288 |
|
|
289 |
|
|
290 |
|
|
291 |
0 |
String data = "ID MN908947; SV 3; linear; genomic RNA; STD; VRL; 20 BP.\n" |
292 |
|
+ "DE Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1,\n" |
293 |
|
+ "FT CDS 3..17\n" |
294 |
|
+ "FT /protein_id=\"QHD43415.1\"\n" |
295 |
|
+ "FT /product=\"orf1ab polyprotein\n" |
296 |
|
+ "FT \"\"foobar\"\" \"\n" |
297 |
|
+ "FT /translation=\"MRKLD\n" |
298 |
|
+ "SQ Sequence 7496 BP; 2450 A; 1290 C; 1434 G; 2322 T; 0 other;\n" |
299 |
|
+ " ggatGcgtaa gttagacgaa attttgtctt tgcgcacaga 40\n"; |
300 |
0 |
FileParse fp = new FileParse(data, DataSourceType.PASTE); |
301 |
0 |
EmblFlatFile parser = new EmblFlatFile(fp, "EmblTest"); |
302 |
0 |
List<SequenceI> seqs = parser.getSeqs(); |
303 |
0 |
assertEquals(seqs.size(), 1); |
304 |
0 |
SequenceI seq = seqs.get(0); |
305 |
0 |
DBModList<DBRefEntry> dbrefs = seq.getDBRefs(); |
306 |
|
|
307 |
|
|
308 |
|
|
309 |
|
|
310 |
0 |
assertEquals(dbrefs.size(), 2); |
311 |
|
|
312 |
|
|
313 |
0 |
DBRefEntry dbref = dbrefs.get(0); |
314 |
0 |
assertEquals(dbref.getSource(), "EMBLTEST"); |
315 |
0 |
assertEquals(dbref.getAccessionId(), "MN908947"); |
316 |
0 |
Mapping mapping = dbref.getMap(); |
317 |
0 |
assertNull(mapping.getTo()); |
318 |
0 |
MapList map = mapping.getMap(); |
319 |
0 |
assertEquals(map.getFromLowest(), 1); |
320 |
0 |
assertEquals(map.getFromHighest(), 40); |
321 |
0 |
assertEquals(map.getToLowest(), 1); |
322 |
0 |
assertEquals(map.getToHighest(), 40); |
323 |
0 |
assertEquals(map.getFromRatio(), 1); |
324 |
0 |
assertEquals(map.getToRatio(), 1); |
325 |
|
|
326 |
|
|
327 |
0 |
dbref = dbrefs.get(1); |
328 |
0 |
assertEquals(dbref.getSource(), "EMBLCDSPROTEIN"); |
329 |
0 |
assertEquals(dbref.getAccessionId(), "QHD43415.1"); |
330 |
0 |
mapping = dbref.getMap(); |
331 |
0 |
SequenceI mapTo = mapping.getTo(); |
332 |
0 |
assertEquals(mapTo.getName(), "QHD43415.1"); |
333 |
|
|
334 |
0 |
assertEquals(mapTo.getDescription(), "orf1ab polyprotein \"foobar\""); |
335 |
0 |
assertEquals(mapTo.getSequenceAsString(), "MRKLD"); |
336 |
0 |
map = mapping.getMap(); |
337 |
0 |
assertEquals(map.getFromLowest(), 3); |
338 |
0 |
assertEquals(map.getFromHighest(), 17); |
339 |
0 |
assertEquals(map.getToLowest(), 1); |
340 |
0 |
assertEquals(map.getToHighest(), 5); |
341 |
0 |
assertEquals(map.getFromRatio(), 3); |
342 |
0 |
assertEquals(map.getToRatio(), 1); |
343 |
|
} |
344 |
|
|
|
|
| 0% |
Uncovered Elements: 13 (13) |
Complexity: 1 |
Complexity Density: 0.08 |
4-
|
|
345 |
0 |
@Test(groups = "Functional")... |
346 |
|
public void testAdjustForProteinLength() |
347 |
|
{ |
348 |
0 |
int[] exons = new int[] { 11, 15, 21, 25, 31, 38 }; |
349 |
|
|
350 |
|
|
351 |
0 |
assertSame(exons, EmblFlatFile.adjustForProteinLength(6, exons)); |
352 |
|
|
353 |
|
|
354 |
|
|
355 |
0 |
int[] truncated = EmblFlatFile.adjustForProteinLength(5, exons); |
356 |
0 |
assertEquals(Arrays.toString(truncated), "[11, 15, 21, 25, 31, 35]"); |
357 |
|
|
358 |
|
|
359 |
0 |
truncated = EmblFlatFile.adjustForProteinLength(4, exons); |
360 |
0 |
assertEquals(Arrays.toString(truncated), "[11, 15, 21, 25, 31, 32]"); |
361 |
|
|
362 |
|
|
363 |
0 |
truncated = EmblFlatFile.adjustForProteinLength(3, exons); |
364 |
0 |
assertEquals(Arrays.toString(truncated), "[11, 15, 21, 24]"); |
365 |
|
|
366 |
|
|
367 |
0 |
exons = new int[] { 11, 15, 21, 27, 33, 38 }; |
368 |
0 |
truncated = EmblFlatFile.adjustForProteinLength(4, exons); |
369 |
0 |
assertEquals(Arrays.toString(truncated), "[11, 15, 21, 27]"); |
370 |
|
|
371 |
|
|
372 |
0 |
truncated = EmblFlatFile.adjustForProteinLength(7, exons); |
373 |
0 |
assertSame(exons, truncated); |
374 |
|
} |
375 |
|
|
|
|
| 0% |
Uncovered Elements: 4 (4) |
Complexity: 1 |
Complexity Density: 0.25 |
4-
|
|
376 |
0 |
@Test(groups = "Functional")... |
377 |
|
public void testRemoveQuotes() |
378 |
|
{ |
379 |
0 |
assertNull(EmblFlatFile.removeQuotes(null)); |
380 |
0 |
assertEquals(EmblFlatFile.removeQuotes("No quotes here"), |
381 |
|
"No quotes here"); |
382 |
0 |
assertEquals(EmblFlatFile.removeQuotes("\"Enclosing quotes\""), |
383 |
|
"Enclosing quotes"); |
384 |
0 |
assertEquals( |
385 |
|
EmblFlatFile.removeQuotes("\"Escaped \"\"quotes\"\" example\""), |
386 |
|
"Escaped \"quotes\" example"); |
387 |
|
} |
388 |
|
} |