1. Project Clover database Wed Nov 13 2024 18:27:33 GMT
  2. Package jalview.ws.dbsources

File UniprotTest.java

 

Code metrics

0
159
9
1
661
556
9
0.06
17.67
9
1

Classes

Class
Line #
Actions
UniprotTest 52 159 9
0.988095298.8%
 

Contributing tests

This file is covered by 6 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.ws.dbsources;
22   
23    import static org.testng.Assert.assertFalse;
24    import static org.testng.AssertJUnit.assertEquals;
25    import static org.testng.AssertJUnit.assertNotNull;
26    import static org.testng.AssertJUnit.assertNull;
27    import static org.testng.AssertJUnit.assertTrue;
28   
29    import java.io.ByteArrayInputStream;
30    import java.io.InputStream;
31    import java.io.UnsupportedEncodingException;
32    import java.math.BigInteger;
33    import java.util.List;
34   
35    import org.testng.Assert;
36    import org.testng.annotations.BeforeClass;
37    import org.testng.annotations.DataProvider;
38    import org.testng.annotations.Test;
39   
40    import jalview.datamodel.DBRefEntry;
41    import jalview.datamodel.DBRefSource;
42    import jalview.datamodel.SequenceFeature;
43    import jalview.datamodel.SequenceI;
44    import jalview.gui.JvOptionPane;
45    import jalview.util.DBRefUtils;
46    import jalview.xml.binding.uniprot.DbReferenceType;
47    import jalview.xml.binding.uniprot.Entry;
48    import jalview.xml.binding.uniprot.FeatureType;
49    import jalview.xml.binding.uniprot.LocationType;
50    import jalview.xml.binding.uniprot.PositionType;
51   
 
52    public class UniprotTest
53    {
54   
 
55  1 toggle @BeforeClass(alwaysRun = true)
56    public void setUpJvOptionPane()
57    {
58  1 JvOptionPane.setInteractiveMode(false);
59  1 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
60    }
61   
62    // adapted from http://www.uniprot.org/uniprot/A9CKP4.xml
63    private static final String UNIPROT_XML = "<?xml version='1.0' encoding='UTF-8'?>"
64    + "<uniprot xmlns=\"http://uniprot.org/uniprot\">"
65    + "<entry dataset=\"TrEMBL\" created=\"2008-01-15\" modified=\"2015-03-04\" version=\"38\">"
66    + "<accession>A9CKP4</accession>"
67    + "<accession>A9CKP5</accession>" + "<name>A9CKP4_AGRT5</name>"
68    + "<name>A9CKP4_AGRT6</name>"
69    + "<protein><recommendedName><fullName>Mitogen-activated protein kinase 13</fullName></recommendedName></protein>"
70    + "<dbReference type=\"PDB\" id=\"2FSQ\"><property type=\"method\" value=\"X-ray\"/><property type=\"resolution\" value=\"1.40\"/></dbReference>"
71    + "<dbReference type=\"PDBsum\" id=\"2FSR\"/>"
72    + "<dbReference type=\"EMBL\" id=\"AE007869\"><property type=\"protein sequence ID\" value=\"AAK85932.1\"/><property type=\"molecule type\" value=\"Genomic_DNA\"/></dbReference>"
73    + "<feature type=\"signal peptide\" evidence=\"7\"><location><begin position=\"1\"/><end position=\"18\"/></location></feature>"
74    + "<feature type=\"propeptide\" description=\"Activation peptide\" id=\"PRO_0000027399\" evidence=\"9 16 17 18\"><location><begin position=\"19\"/><end position=\"20\"/></location></feature>"
75    + "<feature type=\"chain\" description=\"Granzyme B\" id=\"PRO_0000027400\"><location><begin position=\"21\"/><end position=\"247\"/></location></feature>"
76    + "<feature type=\"sequence variant\"><original>M</original><variation>L</variation><location><position position=\"41\"/></location></feature>"
77    + "<feature type=\"sequence variant\" description=\"Pathogenic\"><original>M</original><variation>L</variation><location><position position=\"41\"/></location></feature>"
78    + "<feature type=\"sequence variant\" description=\"Pathogenic\"><original>M</original><location><position position=\"41\"/></location></feature>"
79    + "<feature type=\"sequence variant\" description=\"Foo\"><variation>L</variation><variation>LMV</variation><original>M</original><location><position position=\"42\"/></location></feature>"
80    + "<feature type=\"sequence variant\" description=\"Foo\"><variation>LL</variation><variation>LMV</variation><original>ML</original><location><begin position=\"42\"/><end position=\"43\"/></location></feature>"
81    + "<feature type=\"sequence variant\" description=\"Foo Too\"><variation>LL</variation><variation>LMVK</variation><original>MLML</original><location><begin position=\"42\"/><end position=\"45\"/></location></feature>"
82    + "<sequence length=\"10\" mass=\"27410\" checksum=\"8CB760AACF88FE6C\" modified=\"2008-01-15\" version=\"1\">MHAPL VSKDL</sequence></entry>"
83    + "</uniprot>";
84   
85    /**
86    * Test the method that unmarshals XML to a Uniprot model
87    *
88    * @throws UnsupportedEncodingException
89    */
 
90  1 toggle @Test(groups = { "Functional" })
91    public void testGetUniprotEntries() throws UnsupportedEncodingException
92    {
93  1 Uniprot u = new Uniprot();
94  1 InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
95  1 List<Entry> entries = u.getUniprotEntries(is);
96  1 assertEquals(1, entries.size());
97  1 Entry entry = entries.get(0);
98  1 assertEquals(2, entry.getName().size());
99  1 assertEquals("A9CKP4_AGRT5", entry.getName().get(0));
100  1 assertEquals("A9CKP4_AGRT6", entry.getName().get(1));
101  1 assertEquals(2, entry.getAccession().size());
102  1 assertEquals("A9CKP4", entry.getAccession().get(0));
103  1 assertEquals("A9CKP5", entry.getAccession().get(1));
104   
105  1 assertEquals("MHAPL VSKDL", entry.getSequence().getValue());
106   
107  1 assertEquals("Mitogen-activated protein kinase 13", entry.getProtein()
108    .getRecommendedName().getFullName().getValue());
109   
110    /*
111    * Check sequence features
112    */
113  1 List<FeatureType> features = entry.getFeature();
114  1 assertEquals(9, features.size());
115  1 FeatureType sf = features.get(0);
116  1 assertEquals("signal peptide", sf.getType());
117  1 assertNull(sf.getDescription());
118  1 assertNull(sf.getStatus());
119  1 assertNull(sf.getLocation().getPosition());
120  1 assertEquals(1, sf.getLocation().getBegin().getPosition().intValue());
121  1 assertEquals(18, sf.getLocation().getEnd().getPosition().intValue());
122  1 sf = features.get(1);
123  1 assertEquals("propeptide", sf.getType());
124  1 assertEquals("Activation peptide", sf.getDescription());
125  1 assertNull(sf.getLocation().getPosition());
126  1 assertEquals(19, sf.getLocation().getBegin().getPosition().intValue());
127  1 assertEquals(20, sf.getLocation().getEnd().getPosition().intValue());
128  1 sf = features.get(2);
129  1 assertEquals("chain", sf.getType());
130  1 assertEquals("Granzyme B", sf.getDescription());
131  1 assertNull(sf.getLocation().getPosition());
132  1 assertEquals(21, sf.getLocation().getBegin().getPosition().intValue());
133  1 assertEquals(247, sf.getLocation().getEnd().getPosition().intValue());
134   
135  1 sf = features.get(3);
136  1 assertEquals("sequence variant", sf.getType());
137  1 assertNull(sf.getDescription());
138  1 assertEquals(41,
139    sf.getLocation().getPosition().getPosition().intValue());
140  1 assertNull(sf.getLocation().getBegin());
141  1 assertNull(sf.getLocation().getEnd());
142   
143  1 sf = features.get(4);
144  1 assertEquals("sequence variant", sf.getType());
145  1 assertEquals("Pathogenic", sf.getDescription());
146  1 assertEquals(41,
147    sf.getLocation().getPosition().getPosition().intValue());
148  1 assertNull(sf.getLocation().getBegin());
149  1 assertNull(sf.getLocation().getEnd());
150   
151  1 sf = features.get(5);
152  1 assertEquals("sequence variant", sf.getType());
153  1 assertEquals("Pathogenic", sf.getDescription());
154  1 assertEquals(41,
155    sf.getLocation().getPosition().getPosition().intValue());
156  1 assertNull(sf.getLocation().getBegin());
157  1 assertNull(sf.getLocation().getEnd());
158   
159  1 sf = features.get(6);
160  1 assertEquals("sequence variant", sf.getType());
161  1 assertEquals("Foo", sf.getDescription());
162  1 assertEquals(42,
163    sf.getLocation().getPosition().getPosition().intValue());
164  1 assertNull(sf.getLocation().getBegin());
165  1 assertNull(sf.getLocation().getEnd());
166  1 Assert.assertEquals(Uniprot.getDescription(sf), "<html>p.Met42Leu"
167    + "<br/>&nbsp;&nbsp;" + "p.Met42LeuMetVal Foo</html>");
168   
169  1 sf = features.get(7);
170  1 assertNull(sf.getLocation().getPosition());
171  1 assertEquals(42, sf.getLocation().getBegin().getPosition().intValue());
172  1 assertEquals(43, sf.getLocation().getEnd().getPosition().intValue());
173  1 Assert.assertEquals(Uniprot.getDescription(sf), "<html>p.MetLeu42LeuLeu"
174    + "<br/>&nbsp;&nbsp;" + "p.MetLeu42LeuMetVal Foo</html>");
175   
176  1 sf = features.get(8);
177  1 assertNull(sf.getLocation().getPosition());
178  1 assertEquals(42, sf.getLocation().getBegin().getPosition().intValue());
179  1 assertEquals(45, sf.getLocation().getEnd().getPosition().intValue());
180  1 Assert.assertEquals(Uniprot.getDescription(sf), "<html>p.MLML42LeuLeu"
181    + "<br/>&nbsp;&nbsp;" + "p.MLML42LMVK Foo Too</html>");
182   
183    /*
184    * Check cross-references
185    */
186  1 List<DbReferenceType> xrefs = entry.getDbReference();
187  1 assertEquals(3, xrefs.size());
188   
189  1 DbReferenceType xref = xrefs.get(0);
190  1 assertEquals("2FSQ", xref.getId());
191  1 assertEquals("PDB", xref.getType());
192  1 assertEquals("X-ray",
193    Uniprot.getProperty(xref.getProperty(), "method"));
194  1 assertEquals("1.40",
195    Uniprot.getProperty(xref.getProperty(), "resolution"));
196   
197  1 xref = xrefs.get(1);
198  1 assertEquals("2FSR", xref.getId());
199  1 assertEquals("PDBsum", xref.getType());
200  1 assertTrue(xref.getProperty().isEmpty());
201   
202  1 xref = xrefs.get(2);
203  1 assertEquals("AE007869", xref.getId());
204  1 assertEquals("EMBL", xref.getType());
205  1 assertEquals("AAK85932.1",
206    Uniprot.getProperty(xref.getProperty(), "protein sequence ID"));
207  1 assertEquals("Genomic_DNA",
208    Uniprot.getProperty(xref.getProperty(), "molecule type"));
209    }
210   
 
211  1 toggle @Test(groups = { "Functional" })
212    public void testGetUniprotSequence() throws UnsupportedEncodingException
213    {
214  1 InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
215  1 Entry entry = new Uniprot().getUniprotEntries(is).get(0);
216  1 SequenceI seq = new Uniprot().uniprotEntryToSequence(entry);
217  1 assertNotNull(seq);
218  1 assertEquals(6, seq.getDBRefs().size()); // 2*Uniprot, PDB, PDBsum, 2*EMBL
219  1 assertEquals(seq.getSequenceAsString(),
220    seq.createDatasetSequence().getSequenceAsString());
221  1 assertEquals(2, seq.getPrimaryDBRefs().size());
222  1 List<DBRefEntry> res = DBRefUtils.searchRefs(seq.getPrimaryDBRefs(),
223    "A9CKP4");
224  1 assertEquals(1, res.size());
225  1 assertTrue(res.get(0).isCanonical());
226  1 res = DBRefUtils.searchRefsForSource(seq.getDBRefs(),
227    DBRefSource.UNIPROT);
228  1 assertEquals(2, res.size());
229    /*
230    * NB this test fragile - relies on ordering being preserved
231    */
232  1 assertTrue(res.get(0).isCanonical());
233  1 assertFalse(res.get(1).isCanonical());
234   
235    // check version is preserved for EMBLCDS
236  1 res = DBRefUtils.searchRefs(seq.getDBRefs(), "AAK85932");
237  1 assertEquals(1, res.size());
238    // Ideally we would expect AAK85932.1 -> AAK85932
239    // assertTrue("1".equals(res.get(0).getVersion()));
240    // but it also passes through DBrefUtils.ensurePrimaries which adds
241    // (promoted) to the version string
242    // FIXME: Jim needs to specify what (promoted) means !! - or perhaps we just
243    // ignore it !
244  1 assertEquals("1 (promoted)", (res.get(0).getVersion()));
245   
246  1 List<SequenceFeature> features = seq.getFeatures().findFeatures(41, 41,
247    "sequence variant");
248    // verify single position features are parsed correctly JAL-4347
249  1 assertNotNull(features);
250  1 assertEquals(3, features.size());
251   
252    }
253   
254    /**
255    * Test the method that formats the sequence id
256    *
257    * @throws UnsupportedEncodingException
258    */
 
259  1 toggle @Test(groups = { "Functional" })
260    public void testGetUniprotEntryId() throws UnsupportedEncodingException
261    {
262  1 InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
263  1 Entry entry = new Uniprot().getUniprotEntries(is).get(0);
264   
265    /*
266    * name formatted with Uniprot Entry name
267    */
268  1 String expectedName = "A9CKP4_AGRT5|A9CKP4_AGRT6";
269  1 assertEquals(expectedName, Uniprot.getUniprotEntryId(entry));
270    }
271   
272    /**
273    * Test the method that formats the sequence description
274    *
275    * @throws UnsupportedEncodingException
276    */
 
277  1 toggle @Test(groups = { "Functional" })
278    public void testGetUniprotEntryDescription()
279    throws UnsupportedEncodingException
280    {
281  1 InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes());
282  1 Entry entry = new Uniprot().getUniprotEntries(is).get(0);
283   
284  1 assertEquals("Mitogen-activated protein kinase 13",
285    Uniprot.getUniprotEntryDescription(entry));
286    }
287   
 
288  1 toggle @Test(groups = { "Functional" })
289    public void testGetDescription()
290    {
291  1 FeatureType ft = new FeatureType();
292  1 assertEquals("", Uniprot.getDescription(ft));
293   
294  1 ft.setDescription("Hello");
295  1 assertEquals("Hello", Uniprot.getDescription(ft));
296   
297  1 ft.setLocation(new LocationType());
298  1 ft.getLocation().setPosition(new PositionType());
299  1 ft.getLocation().getPosition().setPosition(BigInteger.valueOf(23));
300  1 ft.setOriginal("K");
301  1 ft.getVariation().add("y");
302  1 assertEquals("p.Lys23Tyr Hello", Uniprot.getDescription(ft));
303   
304    // multiple variants generate an html description over more than one line
305  1 ft.getVariation().add("W");
306  1 assertEquals("<html>p.Lys23Tyr<br/>&nbsp;&nbsp;p.Lys23Trp Hello</html>",
307    Uniprot.getDescription(ft));
308   
309    /*
310    * indel cases
311    * up to 3 bases (original or variant) are shown using 3 letter code
312    */
313  1 ft.getVariation().clear();
314  1 ft.getVariation().add("KWE");
315  1 ft.setOriginal("KLS");
316  1 assertEquals("p.LysLeuSer23LysTrpGlu Hello",
317    Uniprot.getDescription(ft));
318   
319    // adding a fourth original base switches to single letter code
320  1 ft.setOriginal("KLST");
321  1 assertEquals("p.KLST23LysTrpGlu Hello", Uniprot.getDescription(ft));
322   
323    // adding a fourth variant switches to single letter code
324  1 ft.getVariation().clear();
325  1 ft.getVariation().add("KWES");
326  1 assertEquals("p.KLST23KWES Hello", Uniprot.getDescription(ft));
327   
328  1 ft.getVariation().clear();
329  1 ft.getVariation().add("z"); // unknown variant - fails gracefully
330  1 ft.setOriginal("K");
331  1 assertEquals("p.Lys23z Hello", Uniprot.getDescription(ft));
332   
333  1 ft.getVariation().clear(); // variant missing - is ignored
334  1 assertEquals("Hello", Uniprot.getDescription(ft));
335    }
336   
337    public static String Q29079 = Q29079 = new String(
338    "<uniprot xmlns=\"http://uniprot.org/uniprot\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://uniprot.org/uniprot http://www.uniprot.org/docs/uniprot.xsd\">\n"
339    + "<entry xmlns=\"http://uniprot.org/uniprot\" dataset=\"Swiss-Prot\" created=\"1997-11-01\" modified=\"2023-09-13\" version=\"103\">\n"
340    + "<accession>Q29079</accession>\n"
341    + "<accession>Q29017</accession>\n"
342    + "<name>PAG2_PIG</name>\n" + "<protein>\n"
343    + "<recommendedName>\n"
344    + "<fullName>Pregnancy-associated glycoprotein 2</fullName>\n"
345    + "<shortName>PAG 2</shortName>\n"
346    + "<ecNumber>3.4.23.-</ecNumber>\n"
347    + "</recommendedName>\n" + "</protein>\n" + "<gene>\n"
348    + "<name type=\"primary\">PAG2</name>\n" + "</gene>\n"
349    + "<organism>\n"
350    + "<name type=\"scientific\">Sus scrofa</name>\n"
351    + "<name type=\"common\">Pig</name>\n"
352    + "<dbReference type=\"NCBI Taxonomy\" id=\"9823\"/>\n"
353    + "<lineage>\n" + "<taxon>Eukaryota</taxon>\n"
354    + "<taxon>Metazoa</taxon>\n" + "<taxon>Chordata</taxon>\n"
355    + "<taxon>Craniata</taxon>\n"
356    + "<taxon>Vertebrata</taxon>\n"
357    + "<taxon>Euteleostomi</taxon>\n"
358    + "<taxon>Mammalia</taxon>\n"
359    + "<taxon>Eutheria</taxon>\n"
360    + "<taxon>Laurasiatheria</taxon>\n"
361    + "<taxon>Artiodactyla</taxon>\n"
362    + "<taxon>Suina</taxon>\n" + "<taxon>Suidae</taxon>\n"
363    + "<taxon>Sus</taxon>\n" + "</lineage>\n"
364    + "</organism>\n" + "<reference key=\"1\">\n"
365    + "<citation type=\"journal article\" date=\"1995\" name=\"Biol. Reprod.\" volume=\"53\" first=\"21\" last=\"28\">\n"
366    + "<title>Porcine pregnancy-associated glycoproteins: new members of the aspartic proteinase gene family expressed in trophectoderm.</title>\n"
367    + "<authorList>\n" + "<person name=\"Szafranska B.\"/>\n"
368    + "<person name=\"Xie S.\"/>\n"
369    + "<person name=\"Green J.\"/>\n"
370    + "<person name=\"Roberts R.M.\"/>\n" + "</authorList>\n"
371    + "<dbReference type=\"PubMed\" id=\"7669851\"/>\n"
372    + "<dbReference type=\"DOI\" id=\"10.1095/biolreprod53.1.21\"/>\n"
373    + "</citation>\n"
374    + "<scope>NUCLEOTIDE SEQUENCE [GENOMIC DNA]</scope>\n"
375    + "</reference>\n" + "<reference key=\"2\">\n"
376    + "<citation type=\"journal article\" date=\"2001\" name=\"Mol. Reprod. Dev.\" volume=\"60\" first=\"137\" last=\"146\">\n"
377    + "<title>Gene for porcine pregnancy-associated glycoprotein 2 (poPAG2): its structural organization and analysis of its promoter.</title>\n"
378    + "<authorList>\n" + "<person name=\"Szafranska B.\"/>\n"
379    + "<person name=\"Miura R.\"/>\n"
380    + "<person name=\"Ghosh D.\"/>\n"
381    + "<person name=\"Ezashi T.\"/>\n"
382    + "<person name=\"Xie S.\"/>\n"
383    + "<person name=\"Roberts R.M.\"/>\n"
384    + "<person name=\"Green J.A.\"/>\n" + "</authorList>\n"
385    + "<dbReference type=\"PubMed\" id=\"11553911\"/>\n"
386    + "<dbReference type=\"DOI\" id=\"10.1002/mrd.1070\"/>\n"
387    + "</citation>\n"
388    + "<scope>NUCLEOTIDE SEQUENCE [GENOMIC DNA]</scope>\n"
389    + "<source>\n" + "<tissue>Placenta</tissue>\n"
390    + "</source>\n" + "</reference>\n"
391    + "<comment type=\"subcellular location\">\n"
392    + "<subcellularLocation>\n"
393    + "<location>Secreted</location>\n"
394    + "<location>Extracellular space</location>\n"
395    + "</subcellularLocation>\n" + "</comment>\n"
396    + "<comment type=\"tissue specificity\">\n"
397    + "<text>Expressed throughout the chorion, with the signal localized exclusively over the trophectoderm.</text>\n"
398    + "</comment>\n"
399    + "<comment type=\"developmental stage\">\n"
400    + "<text>Expression was detected at day 15, coinciding with the beginning of implantation, and continued throughout gestation.</text>\n"
401    + "</comment>\n" + "<comment type=\"similarity\">\n"
402    + "<text evidence=\"5\">Belongs to the peptidase A1 family.</text>\n"
403    + "</comment>\n"
404    + "<dbReference type=\"EC\" id=\"3.4.23.-\"/>\n"
405    + "<dbReference type=\"EMBL\" id=\"U39763\">\n"
406    + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
407    + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
408    + "</dbReference>\n"
409    + "<dbReference type=\"EMBL\" id=\"U41421\">\n"
410    + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
411    + "<property type=\"status\" value=\"JOINED\"/>\n"
412    + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
413    + "</dbReference>\n"
414    + "<dbReference type=\"EMBL\" id=\"U41422\">\n"
415    + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
416    + "<property type=\"status\" value=\"JOINED\"/>\n"
417    + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
418    + "</dbReference>\n"
419    + "<dbReference type=\"EMBL\" id=\"U39199\">\n"
420    + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
421    + "<property type=\"status\" value=\"JOINED\"/>\n"
422    + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
423    + "</dbReference>\n"
424    + "<dbReference type=\"EMBL\" id=\"U41423\">\n"
425    + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
426    + "<property type=\"status\" value=\"JOINED\"/>\n"
427    + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
428    + "</dbReference>\n"
429    + "<dbReference type=\"EMBL\" id=\"U41424\">\n"
430    + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
431    + "<property type=\"status\" value=\"JOINED\"/>\n"
432    + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
433    + "</dbReference>\n"
434    + "<dbReference type=\"EMBL\" id=\"U39762\">\n"
435    + "<property type=\"protein sequence ID\" value=\"AAA92055.1\"/>\n"
436    + "<property type=\"status\" value=\"JOINED\"/>\n"
437    + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
438    + "</dbReference>\n"
439    + "<dbReference type=\"EMBL\" id=\"L34361\">\n"
440    + "<property type=\"protein sequence ID\" value=\"AAA81531.1\"/>\n"
441    + "<property type=\"molecule type\" value=\"Genomic_DNA\"/>\n"
442    + "</dbReference>\n"
443    + "<dbReference type=\"PIR\" id=\"I46617\">\n"
444    + "<property type=\"entry name\" value=\"I46617\"/>\n"
445    + "</dbReference>\n"
446    + "<dbReference type=\"AlphaFoldDB\" id=\"Q29079\"/>\n"
447    + "<dbReference type=\"SMR\" id=\"Q29079\"/>\n"
448    + "<dbReference type=\"MEROPS\" id=\"A01.051\"/>\n"
449    + "<dbReference type=\"GlyCosmos\" id=\"Q29079\">\n"
450    + "<property type=\"glycosylation\" value=\"2 sites, No reported glycans\"/>\n"
451    + "</dbReference>\n"
452    + "<dbReference type=\"InParanoid\" id=\"Q29079\"/>\n"
453    + "<dbReference type=\"Proteomes\" id=\"UP000008227\">\n"
454    + "<property type=\"component\" value=\"Unplaced\"/>\n"
455    + "</dbReference>\n"
456    + "<dbReference type=\"Proteomes\" id=\"UP000314985\">\n"
457    + "<property type=\"component\" value=\"Unplaced\"/>\n"
458    + "</dbReference>\n"
459    + "<dbReference type=\"Proteomes\" id=\"UP000694570\">\n"
460    + "<property type=\"component\" value=\"Unplaced\"/>\n"
461    + "</dbReference>\n"
462    + "<dbReference type=\"Proteomes\" id=\"UP000694571\">\n"
463    + "<property type=\"component\" value=\"Unplaced\"/>\n"
464    + "</dbReference>\n"
465    + "<dbReference type=\"Proteomes\" id=\"UP000694720\">\n"
466    + "<property type=\"component\" value=\"Unplaced\"/>\n"
467    + "</dbReference>\n"
468    + "<dbReference type=\"Proteomes\" id=\"UP000694722\">\n"
469    + "<property type=\"component\" value=\"Unplaced\"/>\n"
470    + "</dbReference>\n"
471    + "<dbReference type=\"Proteomes\" id=\"UP000694723\">\n"
472    + "<property type=\"component\" value=\"Unplaced\"/>\n"
473    + "</dbReference>\n"
474    + "<dbReference type=\"Proteomes\" id=\"UP000694724\">\n"
475    + "<property type=\"component\" value=\"Unplaced\"/>\n"
476    + "</dbReference>\n"
477    + "<dbReference type=\"Proteomes\" id=\"UP000694725\">\n"
478    + "<property type=\"component\" value=\"Unplaced\"/>\n"
479    + "</dbReference>\n"
480    + "<dbReference type=\"Proteomes\" id=\"UP000694726\">\n"
481    + "<property type=\"component\" value=\"Unplaced\"/>\n"
482    + "</dbReference>\n"
483    + "<dbReference type=\"Proteomes\" id=\"UP000694727\">\n"
484    + "<property type=\"component\" value=\"Unplaced\"/>\n"
485    + "</dbReference>\n"
486    + "<dbReference type=\"Proteomes\" id=\"UP000694728\">\n"
487    + "<property type=\"component\" value=\"Unplaced\"/>\n"
488    + "</dbReference>\n"
489    + "<dbReference type=\"GO\" id=\"GO:0005615\">\n"
490    + "<property type=\"term\" value=\"C:extracellular space\"/>\n"
491    + "<property type=\"evidence\" value=\"ECO:0007669\"/>\n"
492    + "<property type=\"project\" value=\"UniProtKB-SubCell\"/>\n"
493    + "</dbReference>\n"
494    + "<dbReference type=\"GO\" id=\"GO:0004190\">\n"
495    + "<property type=\"term\" value=\"F:aspartic-type endopeptidase activity\"/>\n"
496    + "<property type=\"evidence\" value=\"ECO:0000318\"/>\n"
497    + "<property type=\"project\" value=\"GO_Central\"/>\n"
498    + "</dbReference>\n"
499    + "<dbReference type=\"GO\" id=\"GO:0006508\">\n"
500    + "<property type=\"term\" value=\"P:proteolysis\"/>\n"
501    + "<property type=\"evidence\" value=\"ECO:0000318\"/>\n"
502    + "<property type=\"project\" value=\"GO_Central\"/>\n"
503    + "</dbReference>\n"
504    + "<dbReference type=\"Gene3D\" id=\"6.10.140.60\">\n"
505    + "<property type=\"match status\" value=\"1\"/>\n"
506    + "</dbReference>\n"
507    + "<dbReference type=\"Gene3D\" id=\"2.40.70.10\">\n"
508    + "<property type=\"entry name\" value=\"Acid Proteases\"/>\n"
509    + "<property type=\"match status\" value=\"3\"/>\n"
510    + "</dbReference>\n"
511    + "<dbReference type=\"InterPro\" id=\"IPR001461\">\n"
512    + "<property type=\"entry name\" value=\"Aspartic_peptidase_A1\"/>\n"
513    + "</dbReference>\n"
514    + "<dbReference type=\"InterPro\" id=\"IPR001969\">\n"
515    + "<property type=\"entry name\" value=\"Aspartic_peptidase_AS\"/>\n"
516    + "</dbReference>\n"
517    + "<dbReference type=\"InterPro\" id=\"IPR012848\">\n"
518    + "<property type=\"entry name\" value=\"Aspartic_peptidase_N\"/>\n"
519    + "</dbReference>\n"
520    + "<dbReference type=\"InterPro\" id=\"IPR033121\">\n"
521    + "<property type=\"entry name\" value=\"PEPTIDASE_A1\"/>\n"
522    + "</dbReference>\n"
523    + "<dbReference type=\"InterPro\" id=\"IPR021109\">\n"
524    + "<property type=\"entry name\" value=\"Peptidase_aspartic_dom_sf\"/>\n"
525    + "</dbReference>\n"
526    + "<dbReference type=\"PANTHER\" id=\"PTHR47966\">\n"
527    + "<property type=\"entry name\" value=\"BETA-SITE APP-CLEAVING ENZYME, ISOFORM A-RELATED\"/>\n"
528    + "<property type=\"match status\" value=\"1\"/>\n"
529    + "</dbReference>\n"
530    + "<dbReference type=\"PANTHER\" id=\"PTHR47966:SF49\">\n"
531    + "<property type=\"entry name\" value=\"PEPSIN A-5\"/>\n"
532    + "<property type=\"match status\" value=\"1\"/>\n"
533    + "</dbReference>\n"
534    + "<dbReference type=\"Pfam\" id=\"PF07966\">\n"
535    + "<property type=\"entry name\" value=\"A1_Propeptide\"/>\n"
536    + "<property type=\"match status\" value=\"1\"/>\n"
537    + "</dbReference>\n"
538    + "<dbReference type=\"Pfam\" id=\"PF00026\">\n"
539    + "<property type=\"entry name\" value=\"Asp\"/>\n"
540    + "<property type=\"match status\" value=\"2\"/>\n"
541    + "</dbReference>\n"
542    + "<dbReference type=\"PRINTS\" id=\"PR00792\">\n"
543    + "<property type=\"entry name\" value=\"PEPSIN\"/>\n"
544    + "</dbReference>\n"
545    + "<dbReference type=\"SUPFAM\" id=\"SSF50630\">\n"
546    + "<property type=\"entry name\" value=\"Acid proteases\"/>\n"
547    + "<property type=\"match status\" value=\"2\"/>\n"
548    + "</dbReference>\n"
549    + "<dbReference type=\"PROSITE\" id=\"PS00141\">\n"
550    + "<property type=\"entry name\" value=\"ASP_PROTEASE\"/>\n"
551    + "<property type=\"match status\" value=\"2\"/>\n"
552    + "</dbReference>\n"
553    + "<dbReference type=\"PROSITE\" id=\"PS51767\">\n"
554    + "<property type=\"entry name\" value=\"PEPTIDASE_A1\"/>\n"
555    + "<property type=\"match status\" value=\"1\"/>\n"
556    + "</dbReference>\n"
557    + "<proteinExistence type=\"evidence at transcript level\"/>\n"
558    + "<keyword id=\"KW-0064\">Aspartyl protease</keyword>\n"
559    + "<keyword id=\"KW-1015\">Disulfide bond</keyword>\n"
560    + "<keyword id=\"KW-0325\">Glycoprotein</keyword>\n"
561    + "<keyword id=\"KW-0378\">Hydrolase</keyword>\n"
562    + "<keyword id=\"KW-0645\">Protease</keyword>\n"
563    + "<keyword id=\"KW-1185\">Reference proteome</keyword>\n"
564    + "<keyword id=\"KW-0964\">Secreted</keyword>\n"
565    + "<keyword id=\"KW-0732\">Signal</keyword>\n"
566    + "<keyword id=\"KW-0865\">Zymogen</keyword>\n"
567    + "<feature type=\"signal peptide\" evidence=\"2\">\n"
568    + "<location>\n" + "<begin position=\"1\"/>\n"
569    + "<end position=\"15\"/>\n" + "</location>\n"
570    + "</feature>\n"
571    + "<feature type=\"propeptide\" id=\"PRO_0000026107\" description=\"Activation peptide\" evidence=\"2\">\n"
572    + "<location>\n" + "<begin position=\"16\"/>\n"
573    + "<end status=\"unknown\"/>\n" + "</location>\n"
574    + "</feature>\n"
575    + "<feature type=\"chain\" id=\"PRO_0000026108\" description=\"Pregnancy-associated glycoprotein 2\">\n"
576    + "<location>\n" + "<begin status=\"unknown\"/>\n"
577    + "<end position=\"420\"/>\n" + "</location>\n"
578    + "</feature>\n"
579    + "<feature type=\"domain\" description=\"Peptidase A1\" evidence=\"3\">\n"
580    + "<location>\n" + "<begin position=\"76\"/>\n"
581    + "<end position=\"417\"/>\n" + "</location>\n"
582    + "</feature>\n"
583    + "<feature type=\"active site\" evidence=\"4\">\n"
584    + "<location>\n" + "<position position=\"94\"/>\n"
585    + "</location>\n" + "</feature>\n"
586    + "<feature type=\"active site\" evidence=\"4\">\n"
587    + "<location>\n" + "<position position=\"277\"/>\n"
588    + "</location>\n" + "</feature>\n"
589    + "<feature type=\"glycosylation site\" description=\"N-linked (GlcNAc...) asparagine\" evidence=\"2\">\n"
590    + "<location>\n" + "<position position=\"56\"/>\n"
591    + "</location>\n" + "</feature>\n"
592    + "<feature type=\"glycosylation site\" description=\"N-linked (GlcNAc...) asparagine\" evidence=\"2\">\n"
593    + "<location>\n" + "<position position=\"79\"/>\n"
594    + "</location>\n" + "</feature>\n"
595    + "<feature type=\"disulfide bond\" evidence=\"1\">\n"
596    + "<location>\n" + "<begin position=\"107\"/>\n"
597    + "<end position=\"112\"/>\n" + "</location>\n"
598    + "</feature>\n"
599    + "<feature type=\"disulfide bond\" evidence=\"1\">\n"
600    + "<location>\n" + "<begin position=\"268\"/>\n"
601    + "<end position=\"272\"/>\n" + "</location>\n"
602    + "</feature>\n"
603    + "<feature type=\"disulfide bond\" evidence=\"1\">\n"
604    + "<location>\n" + "<begin position=\"341\"/>\n"
605    + "<end position=\"376\"/>\n" + "</location>\n"
606    + "</feature>\n"
607    + "<feature type=\"sequence conflict\" description=\"In Ref. 1.\" evidence=\"5\" ref=\"1\">\n"
608    + "<location>\n" + "<begin position=\"335\"/>\n"
609    + "<end position=\"367\"/>\n" + "</location>\n"
610    + "</feature>\n"
611    + "<evidence type=\"ECO:0000250\" key=\"1\"/>\n"
612    + "<evidence type=\"ECO:0000255\" key=\"2\"/>\n"
613    + "<evidence type=\"ECO:0000255\" key=\"3\">\n"
614    + "<source>\n"
615    + "<dbReference type=\"PROSITE-ProRule\" id=\"PRU01103\"/>\n"
616    + "</source>\n" + "</evidence>\n"
617    + "<evidence type=\"ECO:0000255\" key=\"4\">\n"
618    + "<source>\n"
619    + "<dbReference type=\"PROSITE-ProRule\" id=\"PRU10094\"/>\n"
620    + "</source>\n" + "</evidence>\n"
621    + "<evidence type=\"ECO:0000305\" key=\"5\"/>\n"
622    + "<sequence length=\"420\" mass=\"47132\" checksum=\"094153B6C1B1FCDB\" modified=\"1997-11-01\" version=\"1\" precursor=\"true\">MKWLVILGLVALSDCLVMIPLTKVKSVRESLREKGLLKNFLKEHPYNMIQNLLSKNSSHVQKFSYQPLRNYLDMVYVGNISIGTPPQQFSVVFDTGSSDLWVPSIYCKSKACVTHRSFNPSHSSTFHDRGKSIKLEYGSGKMSGFLGQDTVRIGQLTSTGQAFGLSKEETGKAFEHAIFDGILGLAYPSIAIKGTTTVIDNLKKQDQISEPVFAFYLSSDKEEGSVVMFGGVDKKYYKGDLKWVPLTQTSYWQIALDRITCRGRVIGCPRGCQAIVDTGTSMLHGPSKAVAKIHSLIKHFEKEYVVPCNARKALPDIVFTINNVDYPVPAQAYIRKYVVPCNARKALPDIVFTINNVDYPVPAQAYIRKNANNNRCYSTFEDIMDTLNQREIWILGDVFLRLYFTVYDEGQNRIGLAQAT</sequence>\n"
623    + "</entry>\n"
624    + "<copyright> Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms Distributed under the Creative Commons Attribution (CC BY 4.0) License </copyright>\n"
625    + "</uniprot>");
626   
 
627  0 toggle @DataProvider
628    public Object[][] problemEntries()
629    {
630  0 return new Object[][] { new Object[] { Q29079 } };
631    }
632   
 
633  1 toggle @Test(groups = "Functional", dataProvider = "problemEntries")
634    public SequenceI testimportOfProblemEntries(String entry)
635    {
636  1 Uniprot u = new Uniprot();
637  1 InputStream is = new ByteArrayInputStream(entry.getBytes());
638  1 List<Entry> entries = u.getUniprotEntries(is);
639  1 assertEquals(1, entries.size());
640  1 SequenceI sq = u.uniprotEntryToSequence(entries.get(0));
641  1 assertNotNull(sq);
642  1 return sq;
643    }
644   
 
645  1 toggle @Test(groups = "Functional")
646    public void checkIndefiniteSequenceFeatures()
647    {
648  1 SequenceI upseq = testimportOfProblemEntries(Q29079);
649  1 List<SequenceFeature> sf = upseq.getFeatures()
650    .getPositionalFeatures("chain");
651  1 assertNotNull(sf);
652  1 assertTrue(sf.size() == 1);
653  1 SequenceFeature chainFeaure = sf.get(0);
654  1 assertTrue(chainFeaure.getBegin() == 1);
655  1 assertTrue(chainFeaure.getEnd() == upseq.getEnd());
656  1 assertNotNull(chainFeaure.getValueAsString("start_status"));
657  1 assertNull(chainFeaure.getValueAsString("end_status"));
658  1 assertTrue(
659    "unknown".equals(chainFeaure.getValueAsString("start_status")));
660    }
661    }