Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
UniprotTest | 52 | 159 | 9 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.ws.dbsources; | |
22 | ||
23 | import static org.testng.Assert.assertFalse; | |
24 | import static org.testng.AssertJUnit.assertEquals; | |
25 | import static org.testng.AssertJUnit.assertNotNull; | |
26 | import static org.testng.AssertJUnit.assertNull; | |
27 | import static org.testng.AssertJUnit.assertTrue; | |
28 | ||
29 | import java.io.ByteArrayInputStream; | |
30 | import java.io.InputStream; | |
31 | import java.io.UnsupportedEncodingException; | |
32 | import java.math.BigInteger; | |
33 | import java.util.List; | |
34 | ||
35 | import org.testng.Assert; | |
36 | import org.testng.annotations.BeforeClass; | |
37 | import org.testng.annotations.DataProvider; | |
38 | import org.testng.annotations.Test; | |
39 | ||
40 | import jalview.datamodel.DBRefEntry; | |
41 | import jalview.datamodel.DBRefSource; | |
42 | import jalview.datamodel.SequenceFeature; | |
43 | import jalview.datamodel.SequenceI; | |
44 | import jalview.gui.JvOptionPane; | |
45 | import jalview.util.DBRefUtils; | |
46 | import jalview.xml.binding.uniprot.DbReferenceType; | |
47 | import jalview.xml.binding.uniprot.Entry; | |
48 | import jalview.xml.binding.uniprot.FeatureType; | |
49 | import jalview.xml.binding.uniprot.LocationType; | |
50 | import jalview.xml.binding.uniprot.PositionType; | |
51 | ||
52 | public class UniprotTest | |
53 | { | |
54 | ||
55 | 1 | @BeforeClass(alwaysRun = true) |
56 | public void setUpJvOptionPane() | |
57 | { | |
58 | 1 | JvOptionPane.setInteractiveMode(false); |
59 | 1 | JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION); |
60 | } | |
61 | ||
62 | // adapted from http://www.uniprot.org/uniprot/A9CKP4.xml | |
63 | private static final String UNIPROT_XML = "<?xml version='1.0' encoding='UTF-8'?>" | |
64 | + "<uniprot xmlns=\"http://uniprot.org/uniprot\">" | |
65 | + "<entry dataset=\"TrEMBL\" created=\"2008-01-15\" modified=\"2015-03-04\" version=\"38\">" | |
66 | + "<accession>A9CKP4</accession>" | |
67 | + "<accession>A9CKP5</accession>" + "<name>A9CKP4_AGRT5</name>" | |
68 | + "<name>A9CKP4_AGRT6</name>" | |
69 | + "<protein><recommendedName><fullName>Mitogen-activated protein kinase 13</fullName></recommendedName></protein>" | |
70 | + "<dbReference type=\"PDB\" id=\"2FSQ\"><property type=\"method\" value=\"X-ray\"/><property type=\"resolution\" value=\"1.40\"/></dbReference>" | |
71 | + "<dbReference type=\"PDBsum\" id=\"2FSR\"/>" | |
72 | + "<dbReference type=\"EMBL\" id=\"AE007869\"><property type=\"protein sequence ID\" value=\"AAK85932.1\"/><property type=\"molecule type\" value=\"Genomic_DNA\"/></dbReference>" | |
73 | + "<feature type=\"signal peptide\" evidence=\"7\"><location><begin position=\"1\"/><end position=\"18\"/></location></feature>" | |
74 | + "<feature type=\"propeptide\" description=\"Activation peptide\" id=\"PRO_0000027399\" evidence=\"9 16 17 18\"><location><begin position=\"19\"/><end position=\"20\"/></location></feature>" | |
75 | + "<feature type=\"chain\" description=\"Granzyme B\" id=\"PRO_0000027400\"><location><begin position=\"21\"/><end position=\"247\"/></location></feature>" | |
76 | + "<feature type=\"sequence variant\"><original>M</original><variation>L</variation><location><position position=\"41\"/></location></feature>" | |
77 | + "<feature type=\"sequence variant\" description=\"Pathogenic\"><original>M</original><variation>L</variation><location><position position=\"41\"/></location></feature>" | |
78 | + "<feature type=\"sequence variant\" description=\"Pathogenic\"><original>M</original><location><position position=\"41\"/></location></feature>" | |
79 | + "<feature type=\"sequence variant\" description=\"Foo\"><variation>L</variation><variation>LMV</variation><original>M</original><location><position position=\"42\"/></location></feature>" | |
80 | + "<feature type=\"sequence variant\" description=\"Foo\"><variation>LL</variation><variation>LMV</variation><original>ML</original><location><begin position=\"42\"/><end position=\"43\"/></location></feature>" | |
81 | + "<feature type=\"sequence variant\" description=\"Foo Too\"><variation>LL</variation><variation>LMVK</variation><original>MLML</original><location><begin position=\"42\"/><end position=\"45\"/></location></feature>" | |
82 | + "<sequence length=\"10\" mass=\"27410\" checksum=\"8CB760AACF88FE6C\" modified=\"2008-01-15\" version=\"1\">MHAPL VSKDL</sequence></entry>" | |
83 | + "</uniprot>"; | |
84 | ||
85 | /** | |
86 | * Test the method that unmarshals XML to a Uniprot model | |
87 | * | |
88 | * @throws UnsupportedEncodingException | |
89 | */ | |
90 | 1 | @Test(groups = { "Functional" }) |
91 | public void testGetUniprotEntries() throws UnsupportedEncodingException | |
92 | { | |
93 | 1 | Uniprot u = new Uniprot(); |
94 | 1 | InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes()); |
95 | 1 | List<Entry> entries = u.getUniprotEntries(is); |
96 | 1 | assertEquals(1, entries.size()); |
97 | 1 | Entry entry = entries.get(0); |
98 | 1 | assertEquals(2, entry.getName().size()); |
99 | 1 | assertEquals("A9CKP4_AGRT5", entry.getName().get(0)); |
100 | 1 | assertEquals("A9CKP4_AGRT6", entry.getName().get(1)); |
101 | 1 | assertEquals(2, entry.getAccession().size()); |
102 | 1 | assertEquals("A9CKP4", entry.getAccession().get(0)); |
103 | 1 | assertEquals("A9CKP5", entry.getAccession().get(1)); |
104 | ||
105 | 1 | assertEquals("MHAPL VSKDL", entry.getSequence().getValue()); |
106 | ||
107 | 1 | assertEquals("Mitogen-activated protein kinase 13", entry.getProtein() |
108 | .getRecommendedName().getFullName().getValue()); | |
109 | ||
110 | /* | |
111 | * Check sequence features | |
112 | */ | |
113 | 1 | List<FeatureType> features = entry.getFeature(); |
114 | 1 | assertEquals(9, features.size()); |
115 | 1 | FeatureType sf = features.get(0); |
116 | 1 | assertEquals("signal peptide", sf.getType()); |
117 | 1 | assertNull(sf.getDescription()); |
118 | 1 | assertNull(sf.getStatus()); |
119 | 1 | assertNull(sf.getLocation().getPosition()); |
120 | 1 | assertEquals(1, sf.getLocation().getBegin().getPosition().intValue()); |
121 | 1 | assertEquals(18, sf.getLocation().getEnd().getPosition().intValue()); |
122 | 1 | sf = features.get(1); |
123 | 1 | assertEquals("propeptide", sf.getType()); |
124 | 1 | assertEquals("Activation peptide", sf.getDescription()); |
125 | 1 | assertNull(sf.getLocation().getPosition()); |
126 | 1 | assertEquals(19, sf.getLocation().getBegin().getPosition().intValue()); |
127 | 1 | assertEquals(20, sf.getLocation().getEnd().getPosition().intValue()); |
128 | 1 | sf = features.get(2); |
129 | 1 | assertEquals("chain", sf.getType()); |
130 | 1 | assertEquals("Granzyme B", sf.getDescription()); |
131 | 1 | assertNull(sf.getLocation().getPosition()); |
132 | 1 | assertEquals(21, sf.getLocation().getBegin().getPosition().intValue()); |
133 | 1 | assertEquals(247, sf.getLocation().getEnd().getPosition().intValue()); |
134 | ||
135 | 1 | sf = features.get(3); |
136 | 1 | assertEquals("sequence variant", sf.getType()); |
137 | 1 | assertNull(sf.getDescription()); |
138 | 1 | assertEquals(41, |
139 | sf.getLocation().getPosition().getPosition().intValue()); | |
140 | 1 | assertNull(sf.getLocation().getBegin()); |
141 | 1 | assertNull(sf.getLocation().getEnd()); |
142 | ||
143 | 1 | sf = features.get(4); |
144 | 1 | assertEquals("sequence variant", sf.getType()); |
145 | 1 | assertEquals("Pathogenic", sf.getDescription()); |
146 | 1 | assertEquals(41, |
147 | sf.getLocation().getPosition().getPosition().intValue()); | |
148 | 1 | assertNull(sf.getLocation().getBegin()); |
149 | 1 | assertNull(sf.getLocation().getEnd()); |
150 | ||
151 | 1 | sf = features.get(5); |
152 | 1 | assertEquals("sequence variant", sf.getType()); |
153 | 1 | assertEquals("Pathogenic", sf.getDescription()); |
154 | 1 | assertEquals(41, |
155 | sf.getLocation().getPosition().getPosition().intValue()); | |
156 | 1 | assertNull(sf.getLocation().getBegin()); |
157 | 1 | assertNull(sf.getLocation().getEnd()); |
158 | ||
159 | 1 | sf = features.get(6); |
160 | 1 | assertEquals("sequence variant", sf.getType()); |
161 | 1 | assertEquals("Foo", sf.getDescription()); |
162 | 1 | assertEquals(42, |
163 | sf.getLocation().getPosition().getPosition().intValue()); | |
164 | 1 | assertNull(sf.getLocation().getBegin()); |
165 | 1 | assertNull(sf.getLocation().getEnd()); |
166 | 1 | Assert.assertEquals(Uniprot.getDescription(sf), "<html>p.Met42Leu" |
167 | + "<br/> " + "p.Met42LeuMetVal Foo</html>"); | |
168 | ||
169 | 1 | sf = features.get(7); |
170 | 1 | assertNull(sf.getLocation().getPosition()); |
171 | 1 | assertEquals(42, sf.getLocation().getBegin().getPosition().intValue()); |
172 | 1 | assertEquals(43, sf.getLocation().getEnd().getPosition().intValue()); |
173 | 1 | Assert.assertEquals(Uniprot.getDescription(sf), "<html>p.MetLeu42LeuLeu" |
174 | + "<br/> " + "p.MetLeu42LeuMetVal Foo</html>"); | |
175 | ||
176 | 1 | sf = features.get(8); |
177 | 1 | assertNull(sf.getLocation().getPosition()); |
178 | 1 | assertEquals(42, sf.getLocation().getBegin().getPosition().intValue()); |
179 | 1 | assertEquals(45, sf.getLocation().getEnd().getPosition().intValue()); |
180 | 1 | Assert.assertEquals(Uniprot.getDescription(sf), "<html>p.MLML42LeuLeu" |
181 | + "<br/> " + "p.MLML42LMVK Foo Too</html>"); | |
182 | ||
183 | /* | |
184 | * Check cross-references | |
185 | */ | |
186 | 1 | List<DbReferenceType> xrefs = entry.getDbReference(); |
187 | 1 | assertEquals(3, xrefs.size()); |
188 | ||
189 | 1 | DbReferenceType xref = xrefs.get(0); |
190 | 1 | assertEquals("2FSQ", xref.getId()); |
191 | 1 | assertEquals("PDB", xref.getType()); |
192 | 1 | assertEquals("X-ray", |
193 | Uniprot.getProperty(xref.getProperty(), "method")); | |
194 | 1 | assertEquals("1.40", |
195 | Uniprot.getProperty(xref.getProperty(), "resolution")); | |
196 | ||
197 | 1 | xref = xrefs.get(1); |
198 | 1 | assertEquals("2FSR", xref.getId()); |
199 | 1 | assertEquals("PDBsum", xref.getType()); |
200 | 1 | assertTrue(xref.getProperty().isEmpty()); |
201 | ||
202 | 1 | xref = xrefs.get(2); |
203 | 1 | assertEquals("AE007869", xref.getId()); |
204 | 1 | assertEquals("EMBL", xref.getType()); |
205 | 1 | assertEquals("AAK85932.1", |
206 | Uniprot.getProperty(xref.getProperty(), "protein sequence ID")); | |
207 | 1 | assertEquals("Genomic_DNA", |
208 | Uniprot.getProperty(xref.getProperty(), "molecule type")); | |
209 | } | |
210 | ||
211 | 1 | @Test(groups = { "Functional" }) |
212 | public void testGetUniprotSequence() throws UnsupportedEncodingException | |
213 | { | |
214 | 1 | InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes()); |
215 | 1 | Entry entry = new Uniprot().getUniprotEntries(is).get(0); |
216 | 1 | SequenceI seq = new Uniprot().uniprotEntryToSequence(entry); |
217 | 1 | assertNotNull(seq); |
218 | 1 | assertEquals(6, seq.getDBRefs().size()); // 2*Uniprot, PDB, PDBsum, 2*EMBL |
219 | 1 | assertEquals(seq.getSequenceAsString(), |
220 | seq.createDatasetSequence().getSequenceAsString()); | |
221 | 1 | assertEquals(2, seq.getPrimaryDBRefs().size()); |
222 | 1 | List<DBRefEntry> res = DBRefUtils.searchRefs(seq.getPrimaryDBRefs(), |
223 | "A9CKP4"); | |
224 | 1 | assertEquals(1, res.size()); |
225 | 1 | assertTrue(res.get(0).isCanonical()); |
226 | 1 | res = DBRefUtils.searchRefsForSource(seq.getDBRefs(), |
227 | DBRefSource.UNIPROT); | |
228 | 1 | assertEquals(2, res.size()); |
229 | /* | |
230 | * NB this test fragile - relies on ordering being preserved | |
231 | */ | |
232 | 1 | assertTrue(res.get(0).isCanonical()); |
233 | 1 | assertFalse(res.get(1).isCanonical()); |
234 | ||
235 | // check version is preserved for EMBLCDS | |
236 | 1 | res = DBRefUtils.searchRefs(seq.getDBRefs(), "AAK85932"); |
237 | 1 | assertEquals(1, res.size()); |
238 | // Ideally we would expect AAK85932.1 -> AAK85932 | |
239 | // assertTrue("1".equals(res.get(0).getVersion())); | |
240 | // but it also passes through DBrefUtils.ensurePrimaries which adds | |
241 | // (promoted) to the version string | |
242 | // FIXME: Jim needs to specify what (promoted) means !! - or perhaps we just | |
243 | // ignore it ! | |
244 | 1 | assertEquals("1 (promoted)", (res.get(0).getVersion())); |
245 | ||
246 | 1 | List<SequenceFeature> features = seq.getFeatures().findFeatures(41, 41, |
247 | "sequence variant"); | |
248 | // verify single position features are parsed correctly JAL-4347 | |
249 | 1 | assertNotNull(features); |
250 | 1 | assertEquals(3, features.size()); |
251 | ||
252 | } | |
253 | ||
254 | /** | |
255 | * Test the method that formats the sequence id | |
256 | * | |
257 | * @throws UnsupportedEncodingException | |
258 | */ | |
259 | 1 | @Test(groups = { "Functional" }) |
260 | public void testGetUniprotEntryId() throws UnsupportedEncodingException | |
261 | { | |
262 | 1 | InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes()); |
263 | 1 | Entry entry = new Uniprot().getUniprotEntries(is).get(0); |
264 | ||
265 | /* | |
266 | * name formatted with Uniprot Entry name | |
267 | */ | |
268 | 1 | String expectedName = "A9CKP4_AGRT5|A9CKP4_AGRT6"; |
269 | 1 | assertEquals(expectedName, Uniprot.getUniprotEntryId(entry)); |
270 | } | |
271 | ||
272 | /** | |
273 | * Test the method that formats the sequence description | |
274 | * | |
275 | * @throws UnsupportedEncodingException | |
276 | */ | |
277 | 1 | @Test(groups = { "Functional" }) |
278 | public void testGetUniprotEntryDescription() | |
279 | throws UnsupportedEncodingException | |
280 | { | |
281 | 1 | InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes()); |
282 | 1 | Entry entry = new Uniprot().getUniprotEntries(is).get(0); |
283 | ||
284 | 1 | assertEquals("Mitogen-activated protein kinase 13", |
285 | Uniprot.getUniprotEntryDescription(entry)); | |
286 | } | |
287 | ||
288 | 1 | @Test(groups = { "Functional" }) |
289 | public void testGetDescription() | |
290 | { | |
291 | 1 | FeatureType ft = new FeatureType(); |
292 | 1 | assertEquals("", Uniprot.getDescription(ft)); |
293 | ||
294 | 1 | ft.setDescription("Hello"); |
295 | 1 | assertEquals("Hello", Uniprot.getDescription(ft)); |
296 | ||
297 | 1 | ft.setLocation(new LocationType()); |
298 | 1 | ft.getLocation().setPosition(new PositionType()); |
299 | 1 | ft.getLocation().getPosition().setPosition(BigInteger.valueOf(23)); |
300 | 1 | ft.setOriginal("K"); |
301 | 1 | ft.getVariation().add("y"); |
302 | 1 | assertEquals("p.Lys23Tyr Hello", Uniprot.getDescription(ft)); |
303 | ||
304 | // multiple variants generate an html description over more than one line | |
305 | 1 | ft.getVariation().add("W"); |
306 | 1 | assertEquals("<html>p.Lys23Tyr<br/> p.Lys23Trp Hello</html>", |
307 | Uniprot.getDescription(ft)); | |
308 | ||
309 | /* | |
310 | * indel cases | |
311 | * up to 3 bases (original or variant) are shown using 3 letter code | |
312 | */ | |
313 | 1 | ft.getVariation().clear(); |
314 | 1 | ft.getVariation().add("KWE"); |
315 | 1 | ft.setOriginal("KLS"); |
316 | 1 | assertEquals("p.LysLeuSer23LysTrpGlu Hello", |
317 | Uniprot.getDescription(ft)); | |
318 | ||
319 | // adding a fourth original base switches to single letter code | |
320 | 1 | ft.setOriginal("KLST"); |
321 | 1 | assertEquals("p.KLST23LysTrpGlu Hello", Uniprot.getDescription(ft)); |
322 | ||
323 | // adding a fourth variant switches to single letter code | |
324 | 1 | ft.getVariation().clear(); |
325 | 1 | ft.getVariation().add("KWES"); |
326 | 1 | assertEquals("p.KLST23KWES Hello", Uniprot.getDescription(ft)); |
327 | ||
328 | 1 | ft.getVariation().clear(); |
329 | 1 | ft.getVariation().add("z"); // unknown variant - fails gracefully |
330 | 1 | ft.setOriginal("K"); |
331 | 1 | assertEquals("p.Lys23z Hello", Uniprot.getDescription(ft)); |
332 | ||
333 | 1 | ft.getVariation().clear(); // variant missing - is ignored |
334 | 1 | assertEquals("Hello", Uniprot.getDescription(ft)); |
335 | } | |
336 | ||
337 | public static String Q29079 = Q29079 = new String( | |
338 | "<uniprot xmlns=\"http://uniprot.org/uniprot\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://uniprot.org/uniprot http://www.uniprot.org/docs/uniprot.xsd\">\n" | |
339 | + "<entry xmlns=\"http://uniprot.org/uniprot\" dataset=\"Swiss-Prot\" created=\"1997-11-01\" modified=\"2023-09-13\" version=\"103\">\n" | |
340 | + "<accession>Q29079</accession>\n" | |
341 | + "<accession>Q29017</accession>\n" | |
342 | + "<name>PAG2_PIG</name>\n" + "<protein>\n" | |
343 | + "<recommendedName>\n" | |
344 | + "<fullName>Pregnancy-associated glycoprotein 2</fullName>\n" | |
345 | + "<shortName>PAG 2</shortName>\n" | |
346 | + "<ecNumber>3.4.23.-</ecNumber>\n" | |
347 | + "</recommendedName>\n" + "</protein>\n" + "<gene>\n" | |
348 | + "<name type=\"primary\">PAG2</name>\n" + "</gene>\n" | |
349 | + "<organism>\n" | |
350 | + "<name type=\"scientific\">Sus scrofa</name>\n" | |
351 | + "<name type=\"common\">Pig</name>\n" | |
352 | + "<dbReference type=\"NCBI Taxonomy\" id=\"9823\"/>\n" | |
353 | + "<lineage>\n" + "<taxon>Eukaryota</taxon>\n" | |
354 | + "<taxon>Metazoa</taxon>\n" + "<taxon>Chordata</taxon>\n" | |
355 | + "<taxon>Craniata</taxon>\n" | |
356 | + "<taxon>Vertebrata</taxon>\n" | |
357 | + "<taxon>Euteleostomi</taxon>\n" | |
358 | + "<taxon>Mammalia</taxon>\n" | |
359 | + "<taxon>Eutheria</taxon>\n" | |
360 | + "<taxon>Laurasiatheria</taxon>\n" | |
361 | + "<taxon>Artiodactyla</taxon>\n" | |
362 | + "<taxon>Suina</taxon>\n" + "<taxon>Suidae</taxon>\n" | |
363 | + "<taxon>Sus</taxon>\n" + "</lineage>\n" | |
364 | + "</organism>\n" + "<reference key=\"1\">\n" | |
365 | + "<citation type=\"journal article\" date=\"1995\" name=\"Biol. Reprod.\" volume=\"53\" first=\"21\" last=\"28\">\n" | |
366 | + "<title>Porcine pregnancy-associated glycoproteins: new members of the aspartic proteinase gene family expressed in trophectoderm.</title>\n" | |
367 | + "<authorList>\n" + "<person name=\"Szafranska B.\"/>\n" | |
368 | + "<person name=\"Xie S.\"/>\n" | |
369 | + "<person name=\"Green J.\"/>\n" | |
370 | + "<person name=\"Roberts R.M.\"/>\n" + "</authorList>\n" | |
371 | + "<dbReference type=\"PubMed\" id=\"7669851\"/>\n" | |
372 | + "<dbReference type=\"DOI\" id=\"10.1095/biolreprod53.1.21\"/>\n" | |
373 | + "</citation>\n" | |
374 | + "<scope>NUCLEOTIDE SEQUENCE [GENOMIC DNA]</scope>\n" | |
375 | + "</reference>\n" + "<reference key=\"2\">\n" | |
376 | + "<citation type=\"journal article\" date=\"2001\" name=\"Mol. Reprod. Dev.\" volume=\"60\" first=\"137\" last=\"146\">\n" | |
377 | + "<title>Gene for porcine pregnancy-associated glycoprotein 2 (poPAG2): its structural organization and analysis of its promoter.</title>\n" | |
378 | + "<authorList>\n" + "<person name=\"Szafranska B.\"/>\n" | |
379 | + "<person name=\"Miura R.\"/>\n" | |
380 | + "<person name=\"Ghosh D.\"/>\n" | |
381 | + "<person name=\"Ezashi T.\"/>\n" | |
382 | + "<person name=\"Xie S.\"/>\n" | |
383 | + "<person name=\"Roberts R.M.\"/>\n" | |
384 | + "<person name=\"Green J.A.\"/>\n" + "</authorList>\n" | |
385 | + "<dbReference type=\"PubMed\" id=\"11553911\"/>\n" | |
386 | + "<dbReference type=\"DOI\" id=\"10.1002/mrd.1070\"/>\n" | |
387 | + "</citation>\n" | |
388 | + "<scope>NUCLEOTIDE SEQUENCE [GENOMIC DNA]</scope>\n" | |
389 | + "<source>\n" + "<tissue>Placenta</tissue>\n" | |
390 | + "</source>\n" + "</reference>\n" | |
391 | + "<comment type=\"subcellular location\">\n" | |
392 | + "<subcellularLocation>\n" | |
393 | + "<location>Secreted</location>\n" | |
394 | + "<location>Extracellular space</location>\n" | |
395 | + "</subcellularLocation>\n" + "</comment>\n" | |
396 | + "<comment type=\"tissue specificity\">\n" | |
397 | + "<text>Expressed throughout the chorion, with the signal localized exclusively over the trophectoderm.</text>\n" | |
398 | + "</comment>\n" | |
399 | + "<comment type=\"developmental stage\">\n" | |
400 | + "<text>Expression was detected at day 15, coinciding with the beginning of implantation, and continued throughout gestation.</text>\n" | |
401 | + "</comment>\n" + "<comment type=\"similarity\">\n" | |
402 | + "<text evidence=\"5\">Belongs to the peptidase A1 family.</text>\n" | |
403 | + "</comment>\n" | |
404 | + "<dbReference type=\"EC\" id=\"3.4.23.-\"/>\n" | |
405 | + "<dbReference type=\"EMBL\" id=\"U39763\">\n" | |
406 | + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n" | |
407 | + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n" | |
408 | + "</dbReference>\n" | |
409 | + "<dbReference type=\"EMBL\" id=\"U41421\">\n" | |
410 | + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n" | |
411 | + "<property type=\"status\" value=\"JOINED\"/>\n" | |
412 | + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n" | |
413 | + "</dbReference>\n" | |
414 | + "<dbReference type=\"EMBL\" id=\"U41422\">\n" | |
415 | + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n" | |
416 | + "<property type=\"status\" value=\"JOINED\"/>\n" | |
417 | + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n" | |
418 | + "</dbReference>\n" | |
419 | + "<dbReference type=\"EMBL\" id=\"U39199\">\n" | |
420 | + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n" | |
421 | + "<property type=\"status\" value=\"JOINED\"/>\n" | |
422 | + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n" | |
423 | + "</dbReference>\n" | |
424 | + "<dbReference type=\"EMBL\" id=\"U41423\">\n" | |
425 | + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n" | |
426 | + "<property type=\"status\" value=\"JOINED\"/>\n" | |
427 | + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n" | |
428 | + "</dbReference>\n" | |
429 | + "<dbReference type=\"EMBL\" id=\"U41424\">\n" | |
430 | + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n" | |
431 | + "<property type=\"status\" value=\"JOINED\"/>\n" | |
432 | + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n" | |
433 | + "</dbReference>\n" | |
434 | + "<dbReference type=\"EMBL\" id=\"U39762\">\n" | |
435 | + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n" | |
436 | + "<property type=\"status\" value=\"JOINED\"/>\n" | |
437 | + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n" | |
438 | + "</dbReference>\n" | |
439 | + "<dbReference type=\"EMBL\" id=\"L34361\">\n" | |
440 | + "<property type=\"protein sequence ID\" value=\"AAA81531.1\"/>\n" | |
441 | + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n" | |
442 | + "</dbReference>\n" | |
443 | + "<dbReference type=\"PIR\" id=\"I46617\">\n" | |
444 | + "<property type=\"entry name\" value=\"I46617\"/>\n" | |
445 | + "</dbReference>\n" | |
446 | + "<dbReference type=\"AlphaFoldDB\" id=\"Q29079\"/>\n" | |
447 | + "<dbReference type=\"SMR\" id=\"Q29079\"/>\n" | |
448 | + "<dbReference type=\"MEROPS\" id=\"A01.051\"/>\n" | |
449 | + "<dbReference type=\"GlyCosmos\" id=\"Q29079\">\n" | |
450 | + "<property type=\"glycosylation\" value=\"2 sites, No reported glycans\"/>\n" | |
451 | + "</dbReference>\n" | |
452 | + "<dbReference type=\"InParanoid\" id=\"Q29079\"/>\n" | |
453 | + "<dbReference type=\"Proteomes\" id=\"UP000008227\">\n" | |
454 | + "<property type=\"component\" value=\"Unplaced\"/>\n" | |
455 | + "</dbReference>\n" | |
456 | + "<dbReference type=\"Proteomes\" id=\"UP000314985\">\n" | |
457 | + "<property type=\"component\" value=\"Unplaced\"/>\n" | |
458 | + "</dbReference>\n" | |
459 | + "<dbReference type=\"Proteomes\" id=\"UP000694570\">\n" | |
460 | + "<property type=\"component\" value=\"Unplaced\"/>\n" | |
461 | + "</dbReference>\n" | |
462 | + "<dbReference type=\"Proteomes\" id=\"UP000694571\">\n" | |
463 | + "<property type=\"component\" value=\"Unplaced\"/>\n" | |
464 | + "</dbReference>\n" | |
465 | + "<dbReference type=\"Proteomes\" id=\"UP000694720\">\n" | |
466 | + "<property type=\"component\" value=\"Unplaced\"/>\n" | |
467 | + "</dbReference>\n" | |
468 | + "<dbReference type=\"Proteomes\" id=\"UP000694722\">\n" | |
469 | + "<property type=\"component\" value=\"Unplaced\"/>\n" | |
470 | + "</dbReference>\n" | |
471 | + "<dbReference type=\"Proteomes\" id=\"UP000694723\">\n" | |
472 | + "<property type=\"component\" value=\"Unplaced\"/>\n" | |
473 | + "</dbReference>\n" | |
474 | + "<dbReference type=\"Proteomes\" id=\"UP000694724\">\n" | |
475 | + "<property type=\"component\" value=\"Unplaced\"/>\n" | |
476 | + "</dbReference>\n" | |
477 | + "<dbReference type=\"Proteomes\" id=\"UP000694725\">\n" | |
478 | + "<property type=\"component\" value=\"Unplaced\"/>\n" | |
479 | + "</dbReference>\n" | |
480 | + "<dbReference type=\"Proteomes\" id=\"UP000694726\">\n" | |
481 | + "<property type=\"component\" value=\"Unplaced\"/>\n" | |
482 | + "</dbReference>\n" | |
483 | + "<dbReference type=\"Proteomes\" id=\"UP000694727\">\n" | |
484 | + "<property type=\"component\" value=\"Unplaced\"/>\n" | |
485 | + "</dbReference>\n" | |
486 | + "<dbReference type=\"Proteomes\" id=\"UP000694728\">\n" | |
487 | + "<property type=\"component\" value=\"Unplaced\"/>\n" | |
488 | + "</dbReference>\n" | |
489 | + "<dbReference type=\"GO\" id=\"GO:0005615\">\n" | |
490 | + "<property type=\"term\" value=\"C:extracellular space\"/>\n" | |
491 | + "<property type=\"evidence\" value=\"ECO:0007669\"/>\n" | |
492 | + "<property type=\"project\" value=\"UniProtKB-SubCell\"/>\n" | |
493 | + "</dbReference>\n" | |
494 | + "<dbReference type=\"GO\" id=\"GO:0004190\">\n" | |
495 | + "<property type=\"term\" value=\"F:aspartic-type endopeptidase activity\"/>\n" | |
496 | + "<property type=\"evidence\" value=\"ECO:0000318\"/>\n" | |
497 | + "<property type=\"project\" value=\"GO_Central\"/>\n" | |
498 | + "</dbReference>\n" | |
499 | + "<dbReference type=\"GO\" id=\"GO:0006508\">\n" | |
500 | + "<property type=\"term\" value=\"P:proteolysis\"/>\n" | |
501 | + "<property type=\"evidence\" value=\"ECO:0000318\"/>\n" | |
502 | + "<property type=\"project\" value=\"GO_Central\"/>\n" | |
503 | + "</dbReference>\n" | |
504 | + "<dbReference type=\"Gene3D\" id=\"6.10.140.60\">\n" | |
505 | + "<property type=\"match status\" value=\"1\"/>\n" | |
506 | + "</dbReference>\n" | |
507 | + "<dbReference type=\"Gene3D\" id=\"2.40.70.10\">\n" | |
508 | + "<property type=\"entry name\" value=\"Acid Proteases\"/>\n" | |
509 | + "<property type=\"match status\" value=\"3\"/>\n" | |
510 | + "</dbReference>\n" | |
511 | + "<dbReference type=\"InterPro\" id=\"IPR001461\">\n" | |
512 | + "<property type=\"entry name\" value=\"Aspartic_peptidase_A1\"/>\n" | |
513 | + "</dbReference>\n" | |
514 | + "<dbReference type=\"InterPro\" id=\"IPR001969\">\n" | |
515 | + "<property type=\"entry name\" value=\"Aspartic_peptidase_AS\"/>\n" | |
516 | + "</dbReference>\n" | |
517 | + "<dbReference type=\"InterPro\" id=\"IPR012848\">\n" | |
518 | + "<property type=\"entry name\" value=\"Aspartic_peptidase_N\"/>\n" | |
519 | + "</dbReference>\n" | |
520 | + "<dbReference type=\"InterPro\" id=\"IPR033121\">\n" | |
521 | + "<property type=\"entry name\" value=\"PEPTIDASE_A1\"/>\n" | |
522 | + "</dbReference>\n" | |
523 | + "<dbReference type=\"InterPro\" id=\"IPR021109\">\n" | |
524 | + "<property type=\"entry name\" value=\"Peptidase_aspartic_dom_sf\"/>\n" | |
525 | + "</dbReference>\n" | |
526 | + "<dbReference type=\"PANTHER\" id=\"PTHR47966\">\n" | |
527 | + "<property type=\"entry name\" value=\"BETA-SITE APP-CLEAVING ENZYME, ISOFORM A-RELATED\"/>\n" | |
528 | + "<property type=\"match status\" value=\"1\"/>\n" | |
529 | + "</dbReference>\n" | |
530 | + "<dbReference type=\"PANTHER\" id=\"PTHR47966:SF49\">\n" | |
531 | + "<property type=\"entry name\" value=\"PEPSIN A-5\"/>\n" | |
532 | + "<property type=\"match status\" value=\"1\"/>\n" | |
533 | + "</dbReference>\n" | |
534 | + "<dbReference type=\"Pfam\" id=\"PF07966\">\n" | |
535 | + "<property type=\"entry name\" value=\"A1_Propeptide\"/>\n" | |
536 | + "<property type=\"match status\" value=\"1\"/>\n" | |
537 | + "</dbReference>\n" | |
538 | + "<dbReference type=\"Pfam\" id=\"PF00026\">\n" | |
539 | + "<property type=\"entry name\" value=\"Asp\"/>\n" | |
540 | + "<property type=\"match status\" value=\"2\"/>\n" | |
541 | + "</dbReference>\n" | |
542 | + "<dbReference type=\"PRINTS\" id=\"PR00792\">\n" | |
543 | + "<property type=\"entry name\" value=\"PEPSIN\"/>\n" | |
544 | + "</dbReference>\n" | |
545 | + "<dbReference type=\"SUPFAM\" id=\"SSF50630\">\n" | |
546 | + "<property type=\"entry name\" value=\"Acid proteases\"/>\n" | |
547 | + "<property type=\"match status\" value=\"2\"/>\n" | |
548 | + "</dbReference>\n" | |
549 | + "<dbReference type=\"PROSITE\" id=\"PS00141\">\n" | |
550 | + "<property type=\"entry name\" value=\"ASP_PROTEASE\"/>\n" | |
551 | + "<property type=\"match status\" value=\"2\"/>\n" | |
552 | + "</dbReference>\n" | |
553 | + "<dbReference type=\"PROSITE\" id=\"PS51767\">\n" | |
554 | + "<property type=\"entry name\" value=\"PEPTIDASE_A1\"/>\n" | |
555 | + "<property type=\"match status\" value=\"1\"/>\n" | |
556 | + "</dbReference>\n" | |
557 | + "<proteinExistence type=\"evidence at transcript level\"/>\n" | |
558 | + "<keyword id=\"KW-0064\">Aspartyl protease</keyword>\n" | |
559 | + "<keyword id=\"KW-1015\">Disulfide bond</keyword>\n" | |
560 | + "<keyword id=\"KW-0325\">Glycoprotein</keyword>\n" | |
561 | + "<keyword id=\"KW-0378\">Hydrolase</keyword>\n" | |
562 | + "<keyword id=\"KW-0645\">Protease</keyword>\n" | |
563 | + "<keyword id=\"KW-1185\">Reference proteome</keyword>\n" | |
564 | + "<keyword id=\"KW-0964\">Secreted</keyword>\n" | |
565 | + "<keyword id=\"KW-0732\">Signal</keyword>\n" | |
566 | + "<keyword id=\"KW-0865\">Zymogen</keyword>\n" | |
567 | + "<feature type=\"signal peptide\" evidence=\"2\">\n" | |
568 | + "<location>\n" + "<begin position=\"1\"/>\n" | |
569 | + "<end position=\"15\"/>\n" + "</location>\n" | |
570 | + "</feature>\n" | |
571 | + "<feature type=\"propeptide\" id=\"PRO_0000026107\" description=\"Activation peptide\" evidence=\"2\">\n" | |
572 | + "<location>\n" + "<begin position=\"16\"/>\n" | |
573 | + "<end status=\"unknown\"/>\n" + "</location>\n" | |
574 | + "</feature>\n" | |
575 | + "<feature type=\"chain\" id=\"PRO_0000026108\" description=\"Pregnancy-associated glycoprotein 2\">\n" | |
576 | + "<location>\n" + "<begin status=\"unknown\"/>\n" | |
577 | + "<end position=\"420\"/>\n" + "</location>\n" | |
578 | + "</feature>\n" | |
579 | + "<feature type=\"domain\" description=\"Peptidase A1\" evidence=\"3\">\n" | |
580 | + "<location>\n" + "<begin position=\"76\"/>\n" | |
581 | + "<end position=\"417\"/>\n" + "</location>\n" | |
582 | + "</feature>\n" | |
583 | + "<feature type=\"active site\" evidence=\"4\">\n" | |
584 | + "<location>\n" + "<position position=\"94\"/>\n" | |
585 | + "</location>\n" + "</feature>\n" | |
586 | + "<feature type=\"active site\" evidence=\"4\">\n" | |
587 | + "<location>\n" + "<position position=\"277\"/>\n" | |
588 | + "</location>\n" + "</feature>\n" | |
589 | + "<feature type=\"glycosylation site\" description=\"N-linked (GlcNAc...) asparagine\" evidence=\"2\">\n" | |
590 | + "<location>\n" + "<position position=\"56\"/>\n" | |
591 | + "</location>\n" + "</feature>\n" | |
592 | + "<feature type=\"glycosylation site\" description=\"N-linked (GlcNAc...) asparagine\" evidence=\"2\">\n" | |
593 | + "<location>\n" + "<position position=\"79\"/>\n" | |
594 | + "</location>\n" + "</feature>\n" | |
595 | + "<feature type=\"disulfide bond\" evidence=\"1\">\n" | |
596 | + "<location>\n" + "<begin position=\"107\"/>\n" | |
597 | + "<end position=\"112\"/>\n" + "</location>\n" | |
598 | + "</feature>\n" | |
599 | + "<feature type=\"disulfide bond\" evidence=\"1\">\n" | |
600 | + "<location>\n" + "<begin position=\"268\"/>\n" | |
601 | + "<end position=\"272\"/>\n" + "</location>\n" | |
602 | + "</feature>\n" | |
603 | + "<feature type=\"disulfide bond\" evidence=\"1\">\n" | |
604 | + "<location>\n" + "<begin position=\"341\"/>\n" | |
605 | + "<end position=\"376\"/>\n" + "</location>\n" | |
606 | + "</feature>\n" | |
607 | + "<feature type=\"sequence conflict\" description=\"In Ref. 1.\" evidence=\"5\" ref=\"1\">\n" | |
608 | + "<location>\n" + "<begin position=\"335\"/>\n" | |
609 | + "<end position=\"367\"/>\n" + "</location>\n" | |
610 | + "</feature>\n" | |
611 | + "<evidence type=\"ECO:0000250\" key=\"1\"/>\n" | |
612 | + "<evidence type=\"ECO:0000255\" key=\"2\"/>\n" | |
613 | + "<evidence type=\"ECO:0000255\" key=\"3\">\n" | |
614 | + "<source>\n" | |
615 | + "<dbReference type=\"PROSITE-ProRule\" id=\"PRU01103\"/>\n" | |
616 | + "</source>\n" + "</evidence>\n" | |
617 | + "<evidence type=\"ECO:0000255\" key=\"4\">\n" | |
618 | + "<source>\n" | |
619 | + "<dbReference type=\"PROSITE-ProRule\" id=\"PRU10094\"/>\n" | |
620 | + "</source>\n" + "</evidence>\n" | |
621 | + "<evidence type=\"ECO:0000305\" key=\"5\"/>\n" | |
622 | + "<sequence length=\"420\" mass=\"47132\" checksum=\"094153B6C1B1FCDB\" modified=\"1997-11-01\" version=\"1\" precursor=\"true\">MKWLVILGLVALSDCLVMIPLTKVKSVRESLREKGLLKNFLKEHPYNMIQNLLSKNSSHVQKFSYQPLRNYLDMVYVGNISIGTPPQQFSVVFDTGSSDLWVPSIYCKSKACVTHRSFNPSHSSTFHDRGKSIKLEYGSGKMSGFLGQDTVRIGQLTSTGQAFGLSKEETGKAFEHAIFDGILGLAYPSIAIKGTTTVIDNLKKQDQISEPVFAFYLSSDKEEGSVVMFGGVDKKYYKGDLKWVPLTQTSYWQIALDRITCRGRVIGCPRGCQAIVDTGTSMLHGPSKAVAKIHSLIKHFEKEYVVPCNARKALPDIVFTINNVDYPVPAQAYIRKYVVPCNARKALPDIVFTINNVDYPVPAQAYIRKNANNNRCYSTFEDIMDTLNQREIWILGDVFLRLYFTVYDEGQNRIGLAQAT</sequence>\n" | |
623 | + "</entry>\n" | |
624 | + "<copyright> Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms Distributed under the Creative Commons Attribution (CC BY 4.0) License </copyright>\n" | |
625 | + "</uniprot>"); | |
626 | ||
627 | 0 | @DataProvider |
628 | public Object[][] problemEntries() | |
629 | { | |
630 | 0 | return new Object[][] { new Object[] { Q29079 } }; |
631 | } | |
632 | ||
633 | 1 | @Test(groups = "Functional", dataProvider = "problemEntries") |
634 | public SequenceI testimportOfProblemEntries(String entry) | |
635 | { | |
636 | 1 | Uniprot u = new Uniprot(); |
637 | 1 | InputStream is = new ByteArrayInputStream(entry.getBytes()); |
638 | 1 | List<Entry> entries = u.getUniprotEntries(is); |
639 | 1 | assertEquals(1, entries.size()); |
640 | 1 | SequenceI sq = u.uniprotEntryToSequence(entries.get(0)); |
641 | 1 | assertNotNull(sq); |
642 | 1 | return sq; |
643 | } | |
644 | ||
645 | 1 | @Test(groups = "Functional") |
646 | public void checkIndefiniteSequenceFeatures() | |
647 | { | |
648 | 1 | SequenceI upseq = testimportOfProblemEntries(Q29079); |
649 | 1 | List<SequenceFeature> sf = upseq.getFeatures() |
650 | .getPositionalFeatures("chain"); | |
651 | 1 | assertNotNull(sf); |
652 | 1 | assertTrue(sf.size() == 1); |
653 | 1 | SequenceFeature chainFeaure = sf.get(0); |
654 | 1 | assertTrue(chainFeaure.getBegin() == 1); |
655 | 1 | assertTrue(chainFeaure.getEnd() == upseq.getEnd()); |
656 | 1 | assertNotNull(chainFeaure.getValueAsString("start_status")); |
657 | 1 | assertNull(chainFeaure.getValueAsString("end_status")); |
658 | 1 | assertTrue( |
659 | "unknown".equals(chainFeaure.getValueAsString("start_status"))); | |
660 | } | |
661 | } |