Clover icon

jalviewX

  1. Project Clover database Wed Oct 31 2018 15:13:58 GMT
  2. Package jalview.ws.sifts

File SiftsClient.java

 

Coverage histogram

../../../img/srcFileCovDistChart3.png
47% of files have more coverage

Code metrics

136
404
49
5
1,262
936
144
0.36
8.24
9.8
2.94

Classes

Class Line # Actions
SiftsClient 77 381 126 423
0.2281021922.8%
SiftsClient.CoordinateSys 123 2 2 2
0.550%
SiftsClient.ResidueDetailType 139 2 2 4
0.00%
SiftsClient.SiftsEntitySortPojo 987 1 1 0
1.0100%
SiftsClient.SegmentHelperPojo 1005 18 13 31
0.00%
 

Contributing tests

No tests hitting this source file were found.

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.ws.sifts;
22   
23    import jalview.analysis.AlignSeq;
24    import jalview.analysis.scoremodels.ScoreMatrix;
25    import jalview.analysis.scoremodels.ScoreModels;
26    import jalview.api.DBRefEntryI;
27    import jalview.api.SiftsClientI;
28    import jalview.datamodel.DBRefEntry;
29    import jalview.datamodel.DBRefSource;
30    import jalview.datamodel.SequenceI;
31    import jalview.io.StructureFile;
32    import jalview.schemes.ResidueProperties;
33    import jalview.structure.StructureMapping;
34    import jalview.util.Comparison;
35    import jalview.util.DBRefUtils;
36    import jalview.util.Format;
37    import jalview.xml.binding.sifts.Entry;
38    import jalview.xml.binding.sifts.Entry.Entity;
39    import jalview.xml.binding.sifts.Entry.Entity.Segment;
40    import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion;
41    import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue;
42    import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb;
43    import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail;
44   
45    import java.io.File;
46    import java.io.FileInputStream;
47    import java.io.FileOutputStream;
48    import java.io.IOException;
49    import java.io.InputStream;
50    import java.io.PrintStream;
51    import java.net.URL;
52    import java.net.URLConnection;
53    import java.nio.file.Files;
54    import java.nio.file.Path;
55    import java.nio.file.attribute.BasicFileAttributes;
56    import java.util.ArrayList;
57    import java.util.Arrays;
58    import java.util.Collection;
59    import java.util.Collections;
60    import java.util.Date;
61    import java.util.HashMap;
62    import java.util.HashSet;
63    import java.util.List;
64    import java.util.Map;
65    import java.util.Set;
66    import java.util.TreeMap;
67    import java.util.zip.GZIPInputStream;
68   
69    import javax.xml.bind.JAXBContext;
70    import javax.xml.bind.Unmarshaller;
71    import javax.xml.stream.XMLInputFactory;
72    import javax.xml.stream.XMLStreamReader;
73   
74    import mc_view.Atom;
75    import mc_view.PDBChain;
76   
 
77    public class SiftsClient implements SiftsClientI
78    {
79    /*
80    * for use in mocking out file fetch for tests only
81    * - reset to null after testing!
82    */
83    private static File mockSiftsFile;
84   
85    private Entry siftsEntry;
86   
87    private StructureFile pdb;
88   
89    private String pdbId;
90   
91    private String structId;
92   
93    private CoordinateSys seqCoordSys = CoordinateSys.UNIPROT;
94   
95    /**
96    * PDB sequence position to sequence coordinate mapping as derived from SIFTS
97    * record for the identified SeqCoordSys Used for lift-over from sequence
98    * derived from PDB (with first extracted PDBRESNUM as 'start' to the sequence
99    * being annotated with PDB data
100    */
101    private jalview.datamodel.Mapping seqFromPdbMapping;
102   
103    private static final int BUFFER_SIZE = 4096;
104   
105    public static final int UNASSIGNED = Integer.MIN_VALUE;
106   
107    private static final int PDB_RES_POS = 0;
108   
109    private static final int PDB_ATOM_POS = 1;
110   
111    private static final int PDBE_POS = 2;
112   
113    private static final String NOT_OBSERVED = "Not_Observed";
114   
115    private static final String SIFTS_FTP_BASE_URL = "http://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/";
116   
117    private final static String NEWLINE = System.lineSeparator();
118   
119    private String curSourceDBRef;
120   
121    private HashSet<String> curDBRefAccessionIdsString;
122   
 
123    private enum CoordinateSys
124    {
125    UNIPROT("UniProt"), PDB("PDBresnum"), PDBe("PDBe");
126    private String name;
127   
 
128  3 toggle private CoordinateSys(String name)
129    {
130  3 this.name = name;
131    }
132   
 
133  0 toggle public String getName()
134    {
135  0 return name;
136    }
137    };
138   
 
139    private enum ResidueDetailType
140    {
141    NAME_SEC_STRUCTURE("nameSecondaryStructure"),
142    CODE_SEC_STRUCTURE("codeSecondaryStructure"), ANNOTATION("Annotation");
143    private String code;
144   
 
145  0 toggle private ResidueDetailType(String code)
146    {
147  0 this.code = code;
148    }
149   
 
150  0 toggle public String getCode()
151    {
152  0 return code;
153    }
154    };
155   
156    /**
157    * Fetch SIFTs file for the given PDBfile and construct an instance of
158    * SiftsClient
159    *
160    * @param pdbId
161    * @throws SiftsException
162    */
 
163  10 toggle public SiftsClient(StructureFile pdb) throws SiftsException
164    {
165  10 this.pdb = pdb;
166  10 this.pdbId = pdb.getId();
167  10 File siftsFile = getSiftsFile(pdbId);
168  10 siftsEntry = parseSIFTs(siftsFile);
169    }
170   
171    /**
172    * Parse the given SIFTs File and return a JAXB POJO of parsed data
173    *
174    * @param siftFile
175    * - the GZipped SIFTs XML file to parse
176    * @return
177    * @throws Exception
178    * if a problem occurs while parsing the SIFTs XML
179    */
 
180  10 toggle private Entry parseSIFTs(File siftFile) throws SiftsException
181    {
182  10 try (InputStream in = new FileInputStream(siftFile);
183  10 GZIPInputStream gzis = new GZIPInputStream(in);)
184    {
185    // System.out.println("File : " + siftFile.getAbsolutePath());
186  10 JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.sifts");
187  10 XMLStreamReader streamReader = XMLInputFactory.newInstance()
188    .createXMLStreamReader(gzis);
189  10 Unmarshaller um = jc.createUnmarshaller();
190  10 return (Entry) um.unmarshal(streamReader);
191    } catch (Exception e)
192    {
193  0 e.printStackTrace();
194  0 throw new SiftsException(e.getMessage());
195    }
196    }
197   
198    /**
199    * Get a SIFTs XML file for a given PDB Id from Cache or download from FTP
200    * repository if not found in cache
201    *
202    * @param pdbId
203    * @return SIFTs XML file
204    * @throws SiftsException
205    */
 
206  10 toggle public static File getSiftsFile(String pdbId) throws SiftsException
207    {
208    /*
209    * return mocked file if it has been set
210    */
211  10 if (mockSiftsFile != null)
212    {
213  0 return mockSiftsFile;
214    }
215   
216  10 String siftsFileName = SiftsSettings.getSiftDownloadDirectory()
217    + pdbId.toLowerCase() + ".xml.gz";
218  10 File siftsFile = new File(siftsFileName);
219  10 if (siftsFile.exists())
220    {
221    // The line below is required for unit testing... don't comment it out!!!
222  10 System.out.println(">>> SIFTS File already downloaded for " + pdbId);
223   
224  10 if (isFileOlderThanThreshold(siftsFile,
225    SiftsSettings.getCacheThresholdInDays()))
226    {
227  4 File oldSiftsFile = new File(siftsFileName + "_old");
228  4 siftsFile.renameTo(oldSiftsFile);
229  4 try
230    {
231  4 siftsFile = downloadSiftsFile(pdbId.toLowerCase());
232  4 oldSiftsFile.delete();
233  4 return siftsFile;
234    } catch (IOException e)
235    {
236  0 e.printStackTrace();
237  0 oldSiftsFile.renameTo(siftsFile);
238  0 return new File(siftsFileName);
239    }
240    }
241    else
242    {
243  6 return siftsFile;
244    }
245    }
246  0 try
247    {
248  0 siftsFile = downloadSiftsFile(pdbId.toLowerCase());
249    } catch (IOException e)
250    {
251  0 throw new SiftsException(e.getMessage());
252    }
253  0 return siftsFile;
254    }
255   
256    /**
257    * This method enables checking if a cached file has exceeded a certain
258    * threshold(in days)
259    *
260    * @param file
261    * the cached file
262    * @param noOfDays
263    * the threshold in days
264    * @return
265    */
 
266  10 toggle public static boolean isFileOlderThanThreshold(File file, int noOfDays)
267    {
268  10 Path filePath = file.toPath();
269  10 BasicFileAttributes attr;
270  10 int diffInDays = 0;
271  10 try
272    {
273  10 attr = Files.readAttributes(filePath, BasicFileAttributes.class);
274  10 diffInDays = (int) ((new Date().getTime()
275    - attr.lastModifiedTime().toMillis())
276    / (1000 * 60 * 60 * 24));
277    // System.out.println("Diff in days : " + diffInDays);
278    } catch (IOException e)
279    {
280  0 e.printStackTrace();
281    }
282  10 return noOfDays <= diffInDays;
283    }
284   
285    /**
286    * Download a SIFTs XML file for a given PDB Id from an FTP repository
287    *
288    * @param pdbId
289    * @return downloaded SIFTs XML file
290    * @throws SiftsException
291    * @throws IOException
292    */
 
293  4 toggle public static File downloadSiftsFile(String pdbId)
294    throws SiftsException, IOException
295    {
296  4 if (pdbId.contains(".cif"))
297    {
298  0 pdbId = pdbId.replace(".cif", "");
299    }
300  4 String siftFile = pdbId + ".xml.gz";
301  4 String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile;
302  4 String downloadedSiftsFile = SiftsSettings.getSiftDownloadDirectory()
303    + siftFile;
304  4 File siftsDownloadDir = new File(
305    SiftsSettings.getSiftDownloadDirectory());
306  4 if (!siftsDownloadDir.exists())
307    {
308  0 siftsDownloadDir.mkdirs();
309    }
310    // System.out.println(">> Download ftp url : " + siftsFileFTPURL);
311    // long now = System.currentTimeMillis();
312  4 URL url = new URL(siftsFileFTPURL);
313  4 URLConnection conn = url.openConnection();
314  4 InputStream inputStream = conn.getInputStream();
315  4 FileOutputStream outputStream = new FileOutputStream(
316    downloadedSiftsFile);
317  4 byte[] buffer = new byte[BUFFER_SIZE];
318  4 int bytesRead = -1;
319  ? while ((bytesRead = inputStream.read(buffer)) != -1)
320    {
321  40 outputStream.write(buffer, 0, bytesRead);
322    }
323  4 outputStream.close();
324  4 inputStream.close();
325    // System.out.println(">>> File downloaded : " + downloadedSiftsFile
326    // + " took " + (System.currentTimeMillis() - now) + "ms");
327  4 return new File(downloadedSiftsFile);
328    }
329   
330    /**
331    * Delete the SIFTs file for the given PDB Id in the local SIFTs download
332    * directory
333    *
334    * @param pdbId
335    * @return true if the file was deleted or doesn't exist
336    */
 
337  0 toggle public static boolean deleteSiftsFileByPDBId(String pdbId)
338    {
339  0 File siftsFile = new File(SiftsSettings.getSiftDownloadDirectory()
340    + pdbId.toLowerCase() + ".xml.gz");
341  0 if (siftsFile.exists())
342    {
343  0 return siftsFile.delete();
344    }
345  0 return true;
346    }
347   
348    /**
349    * Get a valid SIFTs DBRef for the given sequence current SIFTs entry
350    *
351    * @param seq
352    * - the target sequence for the operation
353    * @return a valid DBRefEntry that is SIFTs compatible
354    * @throws Exception
355    * if no valid source DBRefEntry was found for the given sequences
356    */
 
357  12 toggle public DBRefEntryI getValidSourceDBRef(SequenceI seq)
358    throws SiftsException
359    {
360  12 List<DBRefEntry> dbRefs = seq.getPrimaryDBRefs();
361  12 if (dbRefs == null || dbRefs.size() < 1)
362    {
363  12 throw new SiftsException(
364    "Source DBRef could not be determined. DBRefs might not have been retrieved.");
365    }
366   
367  0 for (DBRefEntry dbRef : dbRefs)
368    {
369  0 if (dbRef == null || dbRef.getAccessionId() == null
370    || dbRef.getSource() == null)
371    {
372  0 continue;
373    }
374  0 String canonicalSource = DBRefUtils
375    .getCanonicalName(dbRef.getSource());
376  0 if (isValidDBRefEntry(dbRef)
377    && (canonicalSource.equalsIgnoreCase(DBRefSource.UNIPROT)
378    || canonicalSource.equalsIgnoreCase(DBRefSource.PDB)))
379    {
380  0 return dbRef;
381    }
382    }
383  0 throw new SiftsException("Could not get source DB Ref");
384    }
385   
386    /**
387    * Check that the DBRef Entry is properly populated and is available in this
388    * SiftClient instance
389    *
390    * @param entry
391    * - DBRefEntry to validate
392    * @return true validation is successful otherwise false is returned.
393    */
 
394  0 toggle boolean isValidDBRefEntry(DBRefEntryI entry)
395    {
396  0 return entry != null && entry.getAccessionId() != null
397    && isFoundInSiftsEntry(entry.getAccessionId());
398    }
399   
 
400  0 toggle @Override
401    public HashSet<String> getAllMappingAccession()
402    {
403  0 HashSet<String> accessions = new HashSet<String>();
404  0 List<Entity> entities = siftsEntry.getEntity();
405  0 for (Entity entity : entities)
406    {
407  0 List<Segment> segments = entity.getSegment();
408  0 for (Segment segment : segments)
409    {
410  0 List<MapRegion> mapRegions = segment.getListMapRegion()
411    .getMapRegion();
412  0 for (MapRegion mapRegion : mapRegions)
413    {
414  0 accessions
415    .add(mapRegion.getDb().getDbAccessionId().toLowerCase());
416    }
417    }
418    }
419  0 return accessions;
420    }
421   
 
422  12 toggle @Override
423    public StructureMapping getSiftsStructureMapping(SequenceI seq,
424    String pdbFile, String chain) throws SiftsException
425    {
426  12 SequenceI aseq = seq;
427  24 while (seq.getDatasetSequence() != null)
428    {
429  12 seq = seq.getDatasetSequence();
430    }
431  12 structId = (chain == null) ? pdbId : pdbId + "|" + chain;
432  12 System.out.println("Getting SIFTS mapping for " + structId + ": seq "
433    + seq.getName());
434   
435  12 final StringBuilder mappingDetails = new StringBuilder(128);
436  12 PrintStream ps = new PrintStream(System.out)
437    {
 
438  0 toggle @Override
439    public void print(String x)
440    {
441  0 mappingDetails.append(x);
442    }
443   
 
444  0 toggle @Override
445    public void println()
446    {
447  0 mappingDetails.append(NEWLINE);
448    }
449    };
450  12 HashMap<Integer, int[]> mapping = getGreedyMapping(chain, seq, ps);
451   
452  0 String mappingOutput = mappingDetails.toString();
453  0 StructureMapping siftsMapping = new StructureMapping(aseq, pdbFile,
454    pdbId, chain, mapping, mappingOutput, seqFromPdbMapping);
455   
456  0 return siftsMapping;
457    }
458   
 
459  12 toggle @Override
460    public HashMap<Integer, int[]> getGreedyMapping(String entityId,
461    SequenceI seq, java.io.PrintStream os) throws SiftsException
462    {
463  12 List<Integer> omitNonObserved = new ArrayList<>();
464  12 int nonObservedShiftIndex = 0,pdbeNonObserved=0;
465    // System.out.println("Generating mappings for : " + entityId);
466  12 Entity entity = null;
467  12 entity = getEntityById(entityId);
468  12 String originalSeq = AlignSeq.extractGaps(
469    jalview.util.Comparison.GapChars, seq.getSequenceAsString());
470  12 HashMap<Integer, int[]> mapping = new HashMap<Integer, int[]>();
471  12 DBRefEntryI sourceDBRef;
472  12 sourceDBRef = getValidSourceDBRef(seq);
473    // TODO ensure sequence start/end is in the same coordinate system and
474    // consistent with the choosen sourceDBRef
475   
476    // set sequence coordinate system - default value is UniProt
477  0 if (sourceDBRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
478    {
479  0 seqCoordSys = CoordinateSys.PDB;
480    }
481   
482  0 HashSet<String> dbRefAccessionIdsString = new HashSet<String>();
483  0 for (DBRefEntry dbref : seq.getDBRefs())
484    {
485  0 dbRefAccessionIdsString.add(dbref.getAccessionId().toLowerCase());
486    }
487  0 dbRefAccessionIdsString.add(sourceDBRef.getAccessionId().toLowerCase());
488   
489  0 curDBRefAccessionIdsString = dbRefAccessionIdsString;
490  0 curSourceDBRef = sourceDBRef.getAccessionId();
491   
492  0 TreeMap<Integer, String> resNumMap = new TreeMap<Integer, String>();
493  0 List<Segment> segments = entity.getSegment();
494  0 SegmentHelperPojo shp = new SegmentHelperPojo(seq, mapping, resNumMap,
495    omitNonObserved, nonObservedShiftIndex,pdbeNonObserved);
496  0 processSegments(segments, shp);
497  0 try
498    {
499  0 populateAtomPositions(entityId, mapping);
500    } catch (Exception e)
501    {
502  0 e.printStackTrace();
503    }
504  0 if (seqCoordSys == CoordinateSys.UNIPROT)
505    {
506  0 padWithGaps(resNumMap, omitNonObserved);
507    }
508  0 int seqStart = UNASSIGNED;
509  0 int seqEnd = UNASSIGNED;
510  0 int pdbStart = UNASSIGNED;
511  0 int pdbEnd = UNASSIGNED;
512   
513  0 if (mapping.isEmpty())
514    {
515  0 throw new SiftsException("SIFTS mapping failed");
516    }
517    // also construct a mapping object between the seq-coord sys and the PDB seq's coord sys
518   
519  0 Integer[] keys = mapping.keySet().toArray(new Integer[0]);
520  0 Arrays.sort(keys);
521  0 seqStart = keys[0];
522  0 seqEnd = keys[keys.length - 1];
523  0 List<int[]> from=new ArrayList<>(),to=new ArrayList<>();
524  0 int[]_cfrom=null,_cto=null;
525  0 String matchedSeq = originalSeq;
526  0 if (seqStart != UNASSIGNED) // fixme! seqStart can map to -1 for a pdb sequence that starts <-1
527    {
528  0 for (int seqps:keys)
529    {
530  0 int pdbpos = mapping.get(seqps)[PDBE_POS];
531  0 if (pdbpos == UNASSIGNED)
532    {
533    // not correct - pdbpos might be -1, but leave it for now
534  0 continue;
535    }
536  0 if (_cfrom==null || seqps!=_cfrom[1]+1)
537    {
538  0 _cfrom = new int[] { seqps,seqps};
539  0 from.add(_cfrom);
540  0 _cto = null; // discontinuity
541    } else {
542  0 _cfrom[1]= seqps;
543    }
544  0 if (_cto==null || pdbpos!=1+_cto[1])
545    {
546  0 _cto = new int[] { pdbpos,pdbpos};
547  0 to.add(_cto);
548    } else {
549  0 _cto[1] = pdbpos;
550    }
551    }
552  0 _cfrom = new int[from.size() * 2];
553  0 _cto = new int[to.size() * 2];
554  0 int p = 0;
555  0 for (int[] range : from)
556    {
557  0 _cfrom[p++] = range[0];
558  0 _cfrom[p++] = range[1];
559    }
560  0 ;
561  0 p = 0;
562  0 for (int[] range : to)
563    {
564  0 _cto[p++] = range[0];
565  0 _cto[p++] = range[1];
566    }
567  0 ;
568   
569  0 seqFromPdbMapping = new jalview.datamodel.Mapping(null, _cto, _cfrom,
570    1,
571    1);
572  0 pdbStart = mapping.get(seqStart)[PDB_RES_POS];
573  0 pdbEnd = mapping.get(seqEnd)[PDB_RES_POS];
574  0 int orignalSeqStart = seq.getStart();
575  0 if (orignalSeqStart >= 1)
576    {
577  0 int subSeqStart = (seqStart >= orignalSeqStart)
578    ? seqStart - orignalSeqStart
579    : 0;
580  0 int subSeqEnd = seqEnd - (orignalSeqStart - 1);
581  0 subSeqEnd = originalSeq.length() < subSeqEnd ? originalSeq.length()
582    : subSeqEnd;
583  0 matchedSeq = originalSeq.substring(subSeqStart, subSeqEnd);
584    }
585    else
586    {
587  0 matchedSeq = originalSeq.substring(1, originalSeq.length());
588    }
589    }
590   
591  0 StringBuilder targetStrucSeqs = new StringBuilder();
592  0 for (String res : resNumMap.values())
593    {
594  0 targetStrucSeqs.append(res);
595    }
596   
597  0 if (os != null)
598    {
599  0 MappingOutputPojo mop = new MappingOutputPojo();
600  0 mop.setSeqStart(seqStart);
601  0 mop.setSeqEnd(seqEnd);
602  0 mop.setSeqName(seq.getName());
603  0 mop.setSeqResidue(matchedSeq);
604   
605  0 mop.setStrStart(pdbStart);
606  0 mop.setStrEnd(pdbEnd);
607  0 mop.setStrName(structId);
608  0 mop.setStrResidue(targetStrucSeqs.toString());
609   
610  0 mop.setType("pep");
611  0 os.print(getMappingOutput(mop).toString());
612  0 os.println();
613    }
614  0 return mapping;
615    }
616   
 
617  0 toggle void processSegments(List<Segment> segments, SegmentHelperPojo shp)
618    {
619  0 SequenceI seq = shp.getSeq();
620  0 HashMap<Integer, int[]> mapping = shp.getMapping();
621  0 TreeMap<Integer, String> resNumMap = shp.getResNumMap();
622  0 List<Integer> omitNonObserved = shp.getOmitNonObserved();
623  0 int nonObservedShiftIndex = shp.getNonObservedShiftIndex();
624  0 int pdbeNonObservedCount = shp.getPdbeNonObserved();
625  0 int firstPDBResNum = UNASSIGNED;
626  0 for (Segment segment : segments)
627    {
628    // System.out.println("Mapping segments : " + segment.getSegId() + "\\"s
629    // + segStartEnd);
630  0 List<Residue> residues = segment.getListResidue().getResidue();
631  0 for (Residue residue : residues)
632    {
633  0 boolean isObserved = isResidueObserved(residue);
634  0 int pdbeIndex = getLeadingIntegerValue(residue.getDbResNum(),
635    UNASSIGNED);
636  0 int currSeqIndex = UNASSIGNED;
637  0 List<CrossRefDb> cRefDbs = residue.getCrossRefDb();
638  0 CrossRefDb pdbRefDb = null;
639  0 for (CrossRefDb cRefDb : cRefDbs)
640    {
641  0 if (cRefDb.getDbSource().equalsIgnoreCase(DBRefSource.PDB))
642    {
643  0 pdbRefDb = cRefDb;
644  0 if (firstPDBResNum == UNASSIGNED)
645    {
646  0 firstPDBResNum = getLeadingIntegerValue(cRefDb.getDbResNum(),
647    UNASSIGNED);
648    }
649    else
650    {
651  0 if (isObserved)
652    {
653    // after we find the first observed residue we just increment
654  0 firstPDBResNum++;
655    }
656    }
657    }
658  0 if (cRefDb.getDbCoordSys().equalsIgnoreCase(seqCoordSys.getName())
659    && isAccessionMatched(cRefDb.getDbAccessionId()))
660    {
661  0 currSeqIndex = getLeadingIntegerValue(cRefDb.getDbResNum(),
662    UNASSIGNED);
663  0 if (pdbRefDb != null)
664    {
665  0 break;// exit loop if pdb and uniprot are already found
666    }
667    }
668    }
669  0 if (!isObserved)
670    {
671  0 ++pdbeNonObservedCount;
672    }
673  0 if (seqCoordSys == seqCoordSys.PDB) // FIXME: is seqCoordSys ever PDBe
674    // ???
675    {
676    // if the sequence has a primary reference to the PDB, then we are
677    // dealing with a sequence extracted directly from the PDB. In that
678    // case, numbering is PDBe - non-observed residues
679  0 currSeqIndex = seq.getStart() - 1 + pdbeIndex;
680    }
681  0 if (!isObserved)
682    {
683  0 if (seqCoordSys != CoordinateSys.UNIPROT) // FIXME: PDB or PDBe only
684    // here
685    {
686    // mapping to PDB or PDBe so we need to bookkeep for the
687    // non-observed
688    // SEQRES positions
689  0 omitNonObserved.add(currSeqIndex);
690  0 ++nonObservedShiftIndex;
691    }
692    }
693  0 if (currSeqIndex == UNASSIGNED)
694    {
695    // change in logic - unobserved residues with no currSeqIndex
696    // corresponding are still counted in both nonObservedShiftIndex and
697    // pdbeIndex...
698  0 continue;
699    }
700    // if (currSeqIndex >= seq.getStart() && currSeqIndex <= seqlength) //
701    // true
702    // numbering
703    // is
704    // not
705    // up
706    // to
707    // seq.getEnd()
708    {
709   
710  0 int resNum = (pdbRefDb == null)
711    ? getLeadingIntegerValue(residue.getDbResNum(),
712    UNASSIGNED)
713    : getLeadingIntegerValue(pdbRefDb.getDbResNum(),
714    UNASSIGNED);
715   
716  0 if (isObserved)
717    {
718  0 char resCharCode = ResidueProperties
719    .getSingleCharacterCode(ResidueProperties
720    .getCanonicalAminoAcid(residue.getDbResName()));
721  0 resNumMap.put(currSeqIndex, String.valueOf(resCharCode));
722   
723  0 int[] mappingcols = new int[] { Integer.valueOf(resNum),
724  0 UNASSIGNED, isObserved ? firstPDBResNum : UNASSIGNED };
725   
726  0 mapping.put(currSeqIndex - nonObservedShiftIndex, mappingcols);
727    }
728    }
729    }
730    }
731    }
732   
733    /**
734    * Get the leading integer part of a string that begins with an integer.
735    *
736    * @param input
737    * - the string input to process
738    * @param failValue
739    * - value returned if unsuccessful
740    * @return
741    */
 
742  0 toggle static int getLeadingIntegerValue(String input, int failValue)
743    {
744  0 if (input == null)
745    {
746  0 return failValue;
747    }
748  0 String[] parts = input.split("(?=\\D)(?<=\\d)");
749  0 if (parts != null && parts.length > 0 && parts[0].matches("[0-9]+"))
750    {
751  0 return Integer.valueOf(parts[0]);
752    }
753  0 return failValue;
754    }
755   
756    /**
757    *
758    * @param chainId
759    * Target chain to populate mapping of its atom positions.
760    * @param mapping
761    * Two dimension array of residue index versus atom position
762    * @throws IllegalArgumentException
763    * Thrown if chainId or mapping is null
764    * @throws SiftsException
765    */
 
766  0 toggle void populateAtomPositions(String chainId, Map<Integer, int[]> mapping)
767    throws IllegalArgumentException, SiftsException
768    {
769  0 try
770    {
771  0 PDBChain chain = pdb.findChain(chainId);
772   
773  0 if (chain == null || mapping == null)
774    {
775  0 throw new IllegalArgumentException(
776    "Chain id or mapping must not be null.");
777    }
778  0 for (int[] map : mapping.values())
779    {
780  0 if (map[PDB_RES_POS] != UNASSIGNED)
781    {
782  0 map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms);
783    }
784    }
785    } catch (NullPointerException e)
786    {
787  0 throw new SiftsException(e.getMessage());
788    } catch (Exception e)
789    {
790  0 throw new SiftsException(e.getMessage());
791    }
792    }
793   
794    /**
795    *
796    * @param residueIndex
797    * The residue index used for the search
798    * @param atoms
799    * A collection of Atom to search
800    * @return atom position for the given residue index
801    */
 
802  0 toggle int getAtomIndex(int residueIndex, Collection<Atom> atoms)
803    {
804  0 if (atoms == null)
805    {
806  0 throw new IllegalArgumentException(
807    "atoms collection must not be null!");
808    }
809  0 for (Atom atom : atoms)
810    {
811  0 if (atom.resNumber == residueIndex)
812    {
813  0 return atom.atomIndex;
814    }
815    }
816  0 return UNASSIGNED;
817    }
818   
819    /**
820    * Checks if the residue instance is marked 'Not_observed' or not
821    *
822    * @param residue
823    * @return
824    */
 
825  0 toggle private boolean isResidueObserved(Residue residue)
826    {
827  0 Set<String> annotations = getResidueAnnotaitons(residue,
828    ResidueDetailType.ANNOTATION);
829  0 if (annotations == null || annotations.isEmpty())
830    {
831  0 return true;
832    }
833  0 for (String annotation : annotations)
834    {
835  0 if (annotation.equalsIgnoreCase(NOT_OBSERVED))
836    {
837  0 return false;
838    }
839    }
840  0 return true;
841    }
842   
843    /**
844    * Get annotation String for a given residue and annotation type
845    *
846    * @param residue
847    * @param type
848    * @return
849    */
 
850  0 toggle private Set<String> getResidueAnnotaitons(Residue residue,
851    ResidueDetailType type)
852    {
853  0 HashSet<String> foundAnnotations = new HashSet<String>();
854  0 List<ResidueDetail> resDetails = residue.getResidueDetail();
855  0 for (ResidueDetail resDetail : resDetails)
856    {
857  0 if (resDetail.getProperty().equalsIgnoreCase(type.getCode()))
858    {
859  0 foundAnnotations.add(resDetail.getContent());
860    }
861    }
862  0 return foundAnnotations;
863    }
864   
 
865  0 toggle @Override
866    public boolean isAccessionMatched(String accession)
867    {
868  0 boolean isStrictMatch = true;
869  0 return isStrictMatch ? curSourceDBRef.equalsIgnoreCase(accession)
870    : curDBRefAccessionIdsString.contains(accession.toLowerCase());
871    }
872   
 
873  0 toggle private boolean isFoundInSiftsEntry(String accessionId)
874    {
875  0 Set<String> siftsDBRefs = getAllMappingAccession();
876  0 return accessionId != null
877    && siftsDBRefs.contains(accessionId.toLowerCase());
878    }
879   
880    /**
881    * Pad omitted residue positions in PDB sequence with gaps
882    *
883    * @param resNumMap
884    */
 
885  0 toggle void padWithGaps(Map<Integer, String> resNumMap,
886    List<Integer> omitNonObserved)
887    {
888  0 if (resNumMap == null || resNumMap.isEmpty())
889    {
890  0 return;
891    }
892  0 Integer[] keys = resNumMap.keySet().toArray(new Integer[0]);
893    // Arrays.sort(keys);
894  0 int firstIndex = keys[0];
895  0 int lastIndex = keys[keys.length - 1];
896    // System.out.println("Min value " + firstIndex);
897    // System.out.println("Max value " + lastIndex);
898  0 for (int x = firstIndex; x <= lastIndex; x++)
899    {
900  0 if (!resNumMap.containsKey(x) && !omitNonObserved.contains(x))
901    {
902  0 resNumMap.put(x, "-");
903    }
904    }
905    }
906   
 
907  12 toggle @Override
908    public Entity getEntityById(String id) throws SiftsException
909    {
910    // Determines an entity to process by performing a heuristic matching of all
911    // Entities with the given chainId and choosing the best matching Entity
912  12 Entity entity = getEntityByMostOptimalMatchedId(id);
913  12 if (entity != null)
914    {
915  12 return entity;
916    }
917  0 throw new SiftsException("Entity " + id + " not found");
918    }
919   
920    /**
921    * This method was added because EntityId is NOT always equal to ChainId.
922    * Hence, it provides the logic to greedily detect the "true" Entity for a
923    * given chainId where discrepancies exist.
924    *
925    * @param chainId
926    * @return
927    */
 
928  12 toggle public Entity getEntityByMostOptimalMatchedId(String chainId)
929    {
930    // System.out.println("---> advanced greedy entityId matching block
931    // entered..");
932  12 List<Entity> entities = siftsEntry.getEntity();
933  12 SiftsEntitySortPojo[] sPojo = new SiftsEntitySortPojo[entities.size()];
934  12 int count = 0;
935  12 for (Entity entity : entities)
936    {
937  21 sPojo[count] = new SiftsEntitySortPojo();
938  21 sPojo[count].entityId = entity.getEntityId();
939   
940  21 List<Segment> segments = entity.getSegment();
941  21 for (Segment segment : segments)
942    {
943  33 List<Residue> residues = segment.getListResidue().getResidue();
944  33 for (Residue residue : residues)
945    {
946  3582 List<CrossRefDb> cRefDbs = residue.getCrossRefDb();
947  3582 for (CrossRefDb cRefDb : cRefDbs)
948    {
949  41364 if (!cRefDb.getDbSource().equalsIgnoreCase("PDB"))
950    {
951  37782 continue;
952    }
953  3582 ++sPojo[count].resCount;
954  3582 if (cRefDb.getDbChainId().equalsIgnoreCase(chainId))
955    {
956  1881 ++sPojo[count].chainIdFreq;
957    }
958    }
959    }
960    }
961  21 sPojo[count].pid = (100 * sPojo[count].chainIdFreq)
962    / sPojo[count].resCount;
963  21 ++count;
964    }
965  12 Arrays.sort(sPojo, Collections.reverseOrder());
966    // System.out.println("highest matched entity : " + sPojo[0].entityId);
967    // System.out.println("highest matched pid : " + sPojo[0].pid);
968   
969  12 if (sPojo[0].entityId != null)
970    {
971  12 if (sPojo[0].pid < 1)
972    {
973  0 return null;
974    }
975  12 for (Entity entity : entities)
976    {
977  18 if (!entity.getEntityId().equalsIgnoreCase(sPojo[0].entityId))
978    {
979  6 continue;
980    }
981  12 return entity;
982    }
983    }
984  0 return null;
985    }
986   
 
987    private class SiftsEntitySortPojo
988    implements Comparable<SiftsEntitySortPojo>
989    {
990    public String entityId;
991   
992    public int chainIdFreq;
993   
994    public int pid;
995   
996    public int resCount;
997   
 
998  9 toggle @Override
999    public int compareTo(SiftsEntitySortPojo o)
1000    {
1001  9 return this.pid - o.pid;
1002    }
1003    }
1004   
 
1005    private class SegmentHelperPojo
1006    {
1007    private SequenceI seq;
1008   
1009    private HashMap<Integer, int[]> mapping;
1010   
1011    private TreeMap<Integer, String> resNumMap;
1012   
1013    private List<Integer> omitNonObserved;
1014   
1015    private int nonObservedShiftIndex;
1016   
1017    /**
1018    * count of number of 'not observed' positions in the PDB record's SEQRES
1019    * (total number of residues with coordinates == length(SEQRES) -
1020    * pdbeNonObserved
1021    */
1022    private int pdbeNonObserved;
1023   
 
1024  0 toggle public SegmentHelperPojo(SequenceI seq, HashMap<Integer, int[]> mapping,
1025    TreeMap<Integer, String> resNumMap,
1026    List<Integer> omitNonObserved, int nonObservedShiftIndex,
1027    int pdbeNonObserved)
1028    {
1029  0 setSeq(seq);
1030  0 setMapping(mapping);
1031  0 setResNumMap(resNumMap);
1032  0 setOmitNonObserved(omitNonObserved);
1033  0 setNonObservedShiftIndex(nonObservedShiftIndex);
1034  0 setPdbeNonObserved(pdbeNonObserved);
1035   
1036    }
1037   
 
1038  0 toggle public void setPdbeNonObserved(int pdbeNonObserved2)
1039    {
1040  0 this.pdbeNonObserved = pdbeNonObserved2;
1041    }
1042   
 
1043  0 toggle public int getPdbeNonObserved()
1044    {
1045  0 return pdbeNonObserved;
1046    }
 
1047  0 toggle public SequenceI getSeq()
1048    {
1049  0 return seq;
1050    }
1051   
 
1052  0 toggle public void setSeq(SequenceI seq)
1053    {
1054  0 this.seq = seq;
1055    }
1056   
 
1057  0 toggle public HashMap<Integer, int[]> getMapping()
1058    {
1059  0 return mapping;
1060    }
1061   
 
1062  0 toggle public void setMapping(HashMap<Integer, int[]> mapping)
1063    {
1064  0 this.mapping = mapping;
1065    }
1066   
 
1067  0 toggle public TreeMap<Integer, String> getResNumMap()
1068    {
1069  0 return resNumMap;
1070    }
1071   
 
1072  0 toggle public void setResNumMap(TreeMap<Integer, String> resNumMap)
1073    {
1074  0 this.resNumMap = resNumMap;
1075    }
1076   
 
1077  0 toggle public List<Integer> getOmitNonObserved()
1078    {
1079  0 return omitNonObserved;
1080    }
1081   
 
1082  0 toggle public void setOmitNonObserved(List<Integer> omitNonObserved)
1083    {
1084  0 this.omitNonObserved = omitNonObserved;
1085    }
1086   
 
1087  0 toggle public int getNonObservedShiftIndex()
1088    {
1089  0 return nonObservedShiftIndex;
1090    }
1091   
 
1092  0 toggle public void setNonObservedShiftIndex(int nonObservedShiftIndex)
1093    {
1094  0 this.nonObservedShiftIndex = nonObservedShiftIndex;
1095    }
1096   
1097    }
1098   
 
1099  0 toggle @Override
1100    public StringBuilder getMappingOutput(MappingOutputPojo mp)
1101    throws SiftsException
1102    {
1103  0 String seqRes = mp.getSeqResidue();
1104  0 String seqName = mp.getSeqName();
1105  0 int sStart = mp.getSeqStart();
1106  0 int sEnd = mp.getSeqEnd();
1107   
1108  0 String strRes = mp.getStrResidue();
1109  0 String strName = mp.getStrName();
1110  0 int pdbStart = mp.getStrStart();
1111  0 int pdbEnd = mp.getStrEnd();
1112   
1113  0 String type = mp.getType();
1114   
1115  0 int maxid = (seqName.length() >= strName.length()) ? seqName.length()
1116    : strName.length();
1117  0 int len = 72 - maxid - 1;
1118   
1119  0 int nochunks = ((seqRes.length()) / len)
1120  0 + ((seqRes.length()) % len > 0 ? 1 : 0);
1121    // output mappings
1122  0 StringBuilder output = new StringBuilder(512);
1123  0 output.append(NEWLINE);
1124  0 output.append("Sequence \u27f7 Structure mapping details")
1125    .append(NEWLINE);
1126  0 output.append("Method: SIFTS");
1127  0 output.append(NEWLINE).append(NEWLINE);
1128   
1129  0 output.append(new Format("%" + maxid + "s").form(seqName));
1130  0 output.append(" : ");
1131  0 output.append(String.valueOf(sStart));
1132  0 output.append(" - ");
1133  0 output.append(String.valueOf(sEnd));
1134  0 output.append(" Maps to ");
1135  0 output.append(NEWLINE);
1136  0 output.append(new Format("%" + maxid + "s").form(structId));
1137  0 output.append(" : ");
1138  0 output.append(String.valueOf(pdbStart));
1139  0 output.append(" - ");
1140  0 output.append(String.valueOf(pdbEnd));
1141  0 output.append(NEWLINE).append(NEWLINE);
1142   
1143  0 ScoreMatrix pam250 = ScoreModels.getInstance().getPam250();
1144  0 int matchedSeqCount = 0;
1145  0 for (int j = 0; j < nochunks; j++)
1146    {
1147    // Print the first aligned sequence
1148  0 output.append(new Format("%" + (maxid) + "s").form(seqName))
1149    .append(" ");
1150   
1151  0 for (int i = 0; i < len; i++)
1152    {
1153  0 if ((i + (j * len)) < seqRes.length())
1154    {
1155  0 output.append(seqRes.charAt(i + (j * len)));
1156    }
1157    }
1158   
1159  0 output.append(NEWLINE);
1160  0 output.append(new Format("%" + (maxid) + "s").form(" ")).append(" ");
1161   
1162    /*
1163    * Print out the match symbols:
1164    * | for exact match (ignoring case)
1165    * . if PAM250 score is positive
1166    * else a space
1167    */
1168  0 for (int i = 0; i < len; i++)
1169    {
1170  0 try
1171    {
1172  0 if ((i + (j * len)) < seqRes.length())
1173    {
1174  0 char c1 = seqRes.charAt(i + (j * len));
1175  0 char c2 = strRes.charAt(i + (j * len));
1176  0 boolean sameChar = Comparison.isSameResidue(c1, c2, false);
1177  0 if (sameChar && !Comparison.isGap(c1))
1178    {
1179  0 matchedSeqCount++;
1180  0 output.append("|");
1181    }
1182  0 else if (type.equals("pep"))
1183    {
1184  0 if (pam250.getPairwiseScore(c1, c2) > 0)
1185    {
1186  0 output.append(".");
1187    }
1188    else
1189    {
1190  0 output.append(" ");
1191    }
1192    }
1193    else
1194    {
1195  0 output.append(" ");
1196    }
1197    }
1198    } catch (IndexOutOfBoundsException e)
1199    {
1200  0 continue;
1201    }
1202    }
1203    // Now print the second aligned sequence
1204  0 output = output.append(NEWLINE);
1205  0 output = output.append(new Format("%" + (maxid) + "s").form(strName))
1206    .append(" ");
1207  0 for (int i = 0; i < len; i++)
1208    {
1209  0 if ((i + (j * len)) < strRes.length())
1210    {
1211  0 output.append(strRes.charAt(i + (j * len)));
1212    }
1213    }
1214  0 output.append(NEWLINE).append(NEWLINE);
1215    }
1216  0 float pid = (float) matchedSeqCount / seqRes.length() * 100;
1217  0 if (pid < SiftsSettings.getFailSafePIDThreshold())
1218    {
1219  0 throw new SiftsException(">>> Low PID detected for SIFTs mapping...");
1220    }
1221  0 output.append("Length of alignment = " + seqRes.length())
1222    .append(NEWLINE);
1223  0 output.append(new Format("Percentage ID = %2.2f").form(pid));
1224  0 return output;
1225    }
1226   
 
1227  0 toggle @Override
1228    public int getEntityCount()
1229    {
1230  0 return siftsEntry.getEntity().size();
1231    }
1232   
 
1233  0 toggle @Override
1234    public String getDbAccessionId()
1235    {
1236  0 return siftsEntry.getDbAccessionId();
1237    }
1238   
 
1239  0 toggle @Override
1240    public String getDbCoordSys()
1241    {
1242  0 return siftsEntry.getDbCoordSys();
1243    }
1244   
 
1245  0 toggle @Override
1246    public String getDbSource()
1247    {
1248  0 return siftsEntry.getDbSource();
1249    }
1250   
 
1251  0 toggle @Override
1252    public String getDbVersion()
1253    {
1254  0 return siftsEntry.getDbVersion();
1255    }
1256   
 
1257  0 toggle public static void setMockSiftsFile(File file)
1258    {
1259  0 mockSiftsFile = file;
1260    }
1261   
1262    }