Clover icon

Coverage Report

  1. Project Clover database Thu Aug 13 2020 12:04:21 BST
  2. Package jalview.ws.sifts

File SiftsClient.java

 

Coverage histogram

../../../img/srcFileCovDistChart1.png
52% of files have more coverage

Code metrics

138
408
49
5
1,279
947
145
0.36
8.33
9.8
2.96

Classes

Class Line # Actions
SiftsClient 78 385 127
0.059566796%
SiftsClient.CoordinateSys 124 2 2
0.550%
SiftsClient.ResidueDetailType 140 2 2
0.00%
SiftsClient.SiftsEntitySortPojo 1004 1 1
0.00%
SiftsClient.SegmentHelperPojo 1022 18 13
0.00%
 

Contributing tests

No tests hitting this source file were found.

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.ws.sifts;
22   
23    import java.io.File;
24    import java.io.FileInputStream;
25    import java.io.FileOutputStream;
26    import java.io.IOException;
27    import java.io.InputStream;
28    import java.io.PrintStream;
29    import java.net.URL;
30    import java.net.URLConnection;
31    import java.nio.file.Files;
32    import java.nio.file.Path;
33    import java.nio.file.attribute.BasicFileAttributes;
34    import java.util.ArrayList;
35    import java.util.Arrays;
36    import java.util.Collection;
37    import java.util.Collections;
38    import java.util.Date;
39    import java.util.HashMap;
40    import java.util.HashSet;
41    import java.util.List;
42    import java.util.Map;
43    import java.util.Set;
44    import java.util.TreeMap;
45    import java.util.zip.GZIPInputStream;
46   
47    import javax.xml.bind.JAXBContext;
48    import javax.xml.bind.JAXBElement;
49    import javax.xml.bind.Unmarshaller;
50    import javax.xml.stream.XMLInputFactory;
51    import javax.xml.stream.XMLStreamReader;
52   
53    import jalview.analysis.AlignSeq;
54    import jalview.analysis.scoremodels.ScoreMatrix;
55    import jalview.analysis.scoremodels.ScoreModels;
56    import jalview.api.DBRefEntryI;
57    import jalview.api.SiftsClientI;
58    import jalview.datamodel.DBRefEntry;
59    import jalview.datamodel.DBRefSource;
60    import jalview.datamodel.SequenceI;
61    import jalview.io.StructureFile;
62    import jalview.schemes.ResidueProperties;
63    import jalview.structure.StructureMapping;
64    import jalview.util.Comparison;
65    import jalview.util.DBRefUtils;
66    import jalview.util.Format;
67    import jalview.util.Platform;
68    import jalview.xml.binding.sifts.Entry;
69    import jalview.xml.binding.sifts.Entry.Entity;
70    import jalview.xml.binding.sifts.Entry.Entity.Segment;
71    import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion;
72    import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue;
73    import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb;
74    import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail;
75    import mc_view.Atom;
76    import mc_view.PDBChain;
77   
 
78    public class SiftsClient implements SiftsClientI
79    {
80    /*
81    * for use in mocking out file fetch for tests only
82    * - reset to null after testing!
83    */
84    private static File mockSiftsFile;
85   
86    private Entry siftsEntry;
87   
88    private StructureFile pdb;
89   
90    private String pdbId;
91   
92    private String structId;
93   
94    private CoordinateSys seqCoordSys = CoordinateSys.UNIPROT;
95   
96    /**
97    * PDB sequence position to sequence coordinate mapping as derived from SIFTS
98    * record for the identified SeqCoordSys Used for lift-over from sequence
99    * derived from PDB (with first extracted PDBRESNUM as 'start' to the sequence
100    * being annotated with PDB data
101    */
102    private jalview.datamodel.Mapping seqFromPdbMapping;
103   
104    private static final int BUFFER_SIZE = 4096;
105   
106    public static final int UNASSIGNED = Integer.MIN_VALUE;
107   
108    private static final int PDB_RES_POS = 0;
109   
110    private static final int PDB_ATOM_POS = 1;
111   
112    private static final int PDBE_POS = 2;
113   
114    private static final String NOT_OBSERVED = "Not_Observed";
115   
116    private static final String SIFTS_FTP_BASE_URL = "http://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/";
117   
118    private final static String NEWLINE = System.lineSeparator();
119   
120    private String curSourceDBRef;
121   
122    private HashSet<String> curDBRefAccessionIdsString;
123   
 
124    private enum CoordinateSys
125    {
126    UNIPROT("UniProt"), PDB("PDBresnum"), PDBe("PDBe");
127    private String name;
128   
 
129  3 toggle private CoordinateSys(String name)
130    {
131  3 this.name = name;
132    }
133   
 
134  0 toggle public String getName()
135    {
136  0 return name;
137    }
138    };
139   
 
140    private enum ResidueDetailType
141    {
142    NAME_SEC_STRUCTURE("nameSecondaryStructure"),
143    CODE_SEC_STRUCTURE("codeSecondaryStructure"), ANNOTATION("Annotation");
144    private String code;
145   
 
146  0 toggle private ResidueDetailType(String code)
147    {
148  0 this.code = code;
149    }
150   
 
151  0 toggle public String getCode()
152    {
153  0 return code;
154    }
155    };
156   
157    /**
158    * Fetch SIFTs file for the given PDBfile and construct an instance of
159    * SiftsClient
160    *
161    * @param pdbId
162    * @throws SiftsException
163    */
 
164  1 toggle public SiftsClient(StructureFile pdb) throws SiftsException
165    {
166  1 this.pdb = pdb;
167  1 this.pdbId = pdb.getId();
168  1 File siftsFile = getSiftsFile(pdbId);
169  1 siftsEntry = parseSIFTs(siftsFile);
170    }
171   
172    /**
173    * Parse the given SIFTs File and return a JAXB POJO of parsed data
174    *
175    * @param siftFile
176    * - the GZipped SIFTs XML file to parse
177    * @return
178    * @throws Exception
179    * if a problem occurs while parsing the SIFTs XML
180    */
 
181  1 toggle private Entry parseSIFTs(File siftFile) throws SiftsException
182    {
183  1 try (InputStream in = new FileInputStream(siftFile);
184  1 GZIPInputStream gzis = new GZIPInputStream(in);)
185    {
186    // System.out.println("File : " + siftFile.getAbsolutePath());
187  1 JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.sifts");
188  1 XMLStreamReader streamReader = XMLInputFactory.newInstance()
189    .createXMLStreamReader(gzis);
190  1 Unmarshaller um = jc.createUnmarshaller();
191  1 JAXBElement<Entry> jbe = um.unmarshal(streamReader, Entry.class);
192  1 return jbe.getValue();
193    } catch (Exception e)
194    {
195  0 e.printStackTrace();
196  0 throw new SiftsException(e.getMessage());
197    }
198    }
199   
200    /**
201    * Get a SIFTs XML file for a given PDB Id from Cache or download from FTP
202    * repository if not found in cache
203    *
204    * @param pdbId
205    * @return SIFTs XML file
206    * @throws SiftsException
207    */
 
208  1 toggle public static File getSiftsFile(String pdbId) throws SiftsException
209    {
210    /*
211    * return mocked file if it has been set
212    */
213  1 if (mockSiftsFile != null)
214    {
215  0 return mockSiftsFile;
216    }
217   
218  1 String siftsFileName = SiftsSettings.getSiftDownloadDirectory()
219    + pdbId.toLowerCase() + ".xml.gz";
220  1 File siftsFile = new File(siftsFileName);
221  1 if (siftsFile.exists())
222    {
223    // The line below is required for unit testing... don't comment it out!!!
224  1 System.out.println(">>> SIFTS File already downloaded for " + pdbId);
225   
226  1 if (isFileOlderThanThreshold(siftsFile,
227    SiftsSettings.getCacheThresholdInDays()))
228    {
229  0 File oldSiftsFile = new File(siftsFileName + "_old");
230  0 siftsFile.renameTo(oldSiftsFile);
231  0 try
232    {
233  0 siftsFile = downloadSiftsFile(pdbId.toLowerCase());
234  0 oldSiftsFile.delete();
235  0 return siftsFile;
236    } catch (IOException e)
237    {
238  0 e.printStackTrace();
239  0 oldSiftsFile.renameTo(siftsFile);
240  0 return new File(siftsFileName);
241    }
242    }
243    else
244    {
245  1 return siftsFile;
246    }
247    }
248  0 try
249    {
250  0 siftsFile = downloadSiftsFile(pdbId.toLowerCase());
251    } catch (IOException e)
252    {
253  0 throw new SiftsException(e.getMessage());
254    }
255  0 return siftsFile;
256    }
257   
258    /**
259    * This method enables checking if a cached file has exceeded a certain
260    * threshold(in days)
261    *
262    * @param file
263    * the cached file
264    * @param noOfDays
265    * the threshold in days
266    * @return
267    */
 
268  1 toggle public static boolean isFileOlderThanThreshold(File file, int noOfDays)
269    {
270  1 Path filePath = file.toPath();
271  1 BasicFileAttributes attr;
272  1 int diffInDays = 0;
273  1 try
274    {
275  1 attr = Files.readAttributes(filePath, BasicFileAttributes.class);
276  1 diffInDays = (int) ((new Date().getTime()
277    - attr.lastModifiedTime().toMillis())
278    / (1000 * 60 * 60 * 24));
279    // System.out.println("Diff in days : " + diffInDays);
280    } catch (IOException e)
281    {
282  0 e.printStackTrace();
283    }
284  1 return noOfDays <= diffInDays;
285    }
286   
287    /**
288    * Download a SIFTs XML file for a given PDB Id from an FTP repository
289    *
290    * @param pdbId
291    * @return downloaded SIFTs XML file
292    * @throws SiftsException
293    * @throws IOException
294    */
 
295  0 toggle public static File downloadSiftsFile(String pdbId)
296    throws SiftsException, IOException
297    {
298  0 if (pdbId.contains(".cif"))
299    {
300  0 pdbId = pdbId.replace(".cif", "");
301    }
302  0 String siftFile = pdbId + ".xml.gz";
303  0 String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile;
304   
305    /*
306    * Download the file from URL to either
307    * Java: directory of cached downloaded SIFTS files
308    * Javascript: temporary 'file' (in-memory cache)
309    */
310  0 File downloadTo = null;
311  0 if (Platform.isJS())
312    {
313  0 downloadTo = File.createTempFile(siftFile, ".xml.gz");
314    }
315    else
316    {
317  0 downloadTo = new File(
318    SiftsSettings.getSiftDownloadDirectory() + siftFile);
319  0 File siftsDownloadDir = new File(
320    SiftsSettings.getSiftDownloadDirectory());
321  0 if (!siftsDownloadDir.exists())
322    {
323  0 siftsDownloadDir.mkdirs();
324    }
325    }
326   
327    // System.out.println(">> Download ftp url : " + siftsFileFTPURL);
328    // long now = System.currentTimeMillis();
329  0 URL url = new URL(siftsFileFTPURL);
330  0 URLConnection conn = url.openConnection();
331  0 InputStream inputStream = conn.getInputStream();
332  0 FileOutputStream outputStream = new FileOutputStream(
333    downloadTo);
334  0 byte[] buffer = new byte[BUFFER_SIZE];
335  0 int bytesRead = -1;
336  0 while ((bytesRead = inputStream.read(buffer)) != -1)
337    {
338  0 outputStream.write(buffer, 0, bytesRead);
339    }
340  0 outputStream.close();
341  0 inputStream.close();
342    // System.out.println(">>> File downloaded : " + downloadedSiftsFile
343    // + " took " + (System.currentTimeMillis() - now) + "ms");
344  0 return downloadTo;
345    }
346   
347    /**
348    * Delete the SIFTs file for the given PDB Id in the local SIFTs download
349    * directory
350    *
351    * @param pdbId
352    * @return true if the file was deleted or doesn't exist
353    */
 
354  0 toggle public static boolean deleteSiftsFileByPDBId(String pdbId)
355    {
356  0 File siftsFile = new File(SiftsSettings.getSiftDownloadDirectory()
357    + pdbId.toLowerCase() + ".xml.gz");
358  0 if (siftsFile.exists())
359    {
360  0 return siftsFile.delete();
361    }
362  0 return true;
363    }
364   
365    /**
366    * Get a valid SIFTs DBRef for the given sequence current SIFTs entry
367    *
368    * @param seq
369    * - the target sequence for the operation
370    * @return a valid DBRefEntry that is SIFTs compatible
371    * @throws Exception
372    * if no valid source DBRefEntry was found for the given sequences
373    */
 
374  0 toggle public DBRefEntryI getValidSourceDBRef(SequenceI seq)
375    throws SiftsException
376    {
377  0 List<DBRefEntry> dbRefs = seq.getPrimaryDBRefs();
378  0 if (dbRefs == null || dbRefs.size() < 1)
379    {
380  0 throw new SiftsException(
381    "Source DBRef could not be determined. DBRefs might not have been retrieved.");
382    }
383   
384  0 for (DBRefEntry dbRef : dbRefs)
385    {
386  0 if (dbRef == null || dbRef.getAccessionId() == null
387    || dbRef.getSource() == null)
388    {
389  0 continue;
390    }
391  0 String canonicalSource = DBRefUtils
392    .getCanonicalName(dbRef.getSource());
393  0 if (isValidDBRefEntry(dbRef)
394    && (canonicalSource.equalsIgnoreCase(DBRefSource.UNIPROT)
395    || canonicalSource.equalsIgnoreCase(DBRefSource.PDB)))
396    {
397  0 return dbRef;
398    }
399    }
400  0 throw new SiftsException("Could not get source DB Ref");
401    }
402   
403    /**
404    * Check that the DBRef Entry is properly populated and is available in this
405    * SiftClient instance
406    *
407    * @param entry
408    * - DBRefEntry to validate
409    * @return true validation is successful otherwise false is returned.
410    */
 
411  0 toggle boolean isValidDBRefEntry(DBRefEntryI entry)
412    {
413  0 return entry != null && entry.getAccessionId() != null
414    && isFoundInSiftsEntry(entry.getAccessionId());
415    }
416   
 
417  0 toggle @Override
418    public HashSet<String> getAllMappingAccession()
419    {
420  0 HashSet<String> accessions = new HashSet<String>();
421  0 List<Entity> entities = siftsEntry.getEntity();
422  0 for (Entity entity : entities)
423    {
424  0 List<Segment> segments = entity.getSegment();
425  0 for (Segment segment : segments)
426    {
427  0 List<MapRegion> mapRegions = segment.getListMapRegion()
428    .getMapRegion();
429  0 for (MapRegion mapRegion : mapRegions)
430    {
431  0 accessions
432    .add(mapRegion.getDb().getDbAccessionId().toLowerCase());
433    }
434    }
435    }
436  0 return accessions;
437    }
438   
 
439  0 toggle @Override
440    public StructureMapping getSiftsStructureMapping(SequenceI seq,
441    String pdbFile, String chain) throws SiftsException
442    {
443  0 SequenceI aseq = seq;
444  0 while (seq.getDatasetSequence() != null)
445    {
446  0 seq = seq.getDatasetSequence();
447    }
448  0 structId = (chain == null) ? pdbId : pdbId + "|" + chain;
449  0 System.out.println("Getting SIFTS mapping for " + structId + ": seq "
450    + seq.getName());
451   
452  0 final StringBuilder mappingDetails = new StringBuilder(128);
453  0 PrintStream ps = new PrintStream(System.out)
454    {
 
455  0 toggle @Override
456    public void print(String x)
457    {
458  0 mappingDetails.append(x);
459    }
460   
 
461  0 toggle @Override
462    public void println()
463    {
464  0 mappingDetails.append(NEWLINE);
465    }
466    };
467  0 HashMap<Integer, int[]> mapping = getGreedyMapping(chain, seq, ps);
468   
469  0 String mappingOutput = mappingDetails.toString();
470  0 StructureMapping siftsMapping = new StructureMapping(aseq, pdbFile,
471    pdbId, chain, mapping, mappingOutput, seqFromPdbMapping);
472   
473  0 return siftsMapping;
474    }
475   
 
476  0 toggle @Override
477    public HashMap<Integer, int[]> getGreedyMapping(String entityId,
478    SequenceI seq, java.io.PrintStream os) throws SiftsException
479    {
480  0 List<Integer> omitNonObserved = new ArrayList<>();
481  0 int nonObservedShiftIndex = 0,pdbeNonObserved=0;
482    // System.out.println("Generating mappings for : " + entityId);
483  0 Entity entity = null;
484  0 entity = getEntityById(entityId);
485  0 String originalSeq = AlignSeq.extractGaps(
486    jalview.util.Comparison.GapChars, seq.getSequenceAsString());
487  0 HashMap<Integer, int[]> mapping = new HashMap<Integer, int[]>();
488  0 DBRefEntryI sourceDBRef;
489  0 sourceDBRef = getValidSourceDBRef(seq);
490    // TODO ensure sequence start/end is in the same coordinate system and
491    // consistent with the choosen sourceDBRef
492   
493    // set sequence coordinate system - default value is UniProt
494  0 if (sourceDBRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
495    {
496  0 seqCoordSys = CoordinateSys.PDB;
497    }
498   
499  0 HashSet<String> dbRefAccessionIdsString = new HashSet<String>();
500  0 for (DBRefEntry dbref : seq.getDBRefs())
501    {
502  0 dbRefAccessionIdsString.add(dbref.getAccessionId().toLowerCase());
503    }
504  0 dbRefAccessionIdsString.add(sourceDBRef.getAccessionId().toLowerCase());
505   
506  0 curDBRefAccessionIdsString = dbRefAccessionIdsString;
507  0 curSourceDBRef = sourceDBRef.getAccessionId();
508   
509  0 TreeMap<Integer, String> resNumMap = new TreeMap<Integer, String>();
510  0 List<Segment> segments = entity.getSegment();
511  0 SegmentHelperPojo shp = new SegmentHelperPojo(seq, mapping, resNumMap,
512    omitNonObserved, nonObservedShiftIndex,pdbeNonObserved);
513  0 processSegments(segments, shp);
514  0 try
515    {
516  0 populateAtomPositions(entityId, mapping);
517    } catch (Exception e)
518    {
519  0 e.printStackTrace();
520    }
521  0 if (seqCoordSys == CoordinateSys.UNIPROT)
522    {
523  0 padWithGaps(resNumMap, omitNonObserved);
524    }
525  0 int seqStart = UNASSIGNED;
526  0 int seqEnd = UNASSIGNED;
527  0 int pdbStart = UNASSIGNED;
528  0 int pdbEnd = UNASSIGNED;
529   
530  0 if (mapping.isEmpty())
531    {
532  0 throw new SiftsException("SIFTS mapping failed");
533    }
534    // also construct a mapping object between the seq-coord sys and the PDB seq's coord sys
535   
536  0 Integer[] keys = mapping.keySet().toArray(new Integer[0]);
537  0 Arrays.sort(keys);
538  0 seqStart = keys[0];
539  0 seqEnd = keys[keys.length - 1];
540  0 List<int[]> from=new ArrayList<>(),to=new ArrayList<>();
541  0 int[]_cfrom=null,_cto=null;
542  0 String matchedSeq = originalSeq;
543  0 if (seqStart != UNASSIGNED) // fixme! seqStart can map to -1 for a pdb sequence that starts <-1
544    {
545  0 for (int seqps:keys)
546    {
547  0 int pdbpos = mapping.get(seqps)[PDBE_POS];
548  0 if (pdbpos == UNASSIGNED)
549    {
550    // not correct - pdbpos might be -1, but leave it for now
551  0 continue;
552    }
553  0 if (_cfrom==null || seqps!=_cfrom[1]+1)
554    {
555  0 _cfrom = new int[] { seqps,seqps};
556  0 from.add(_cfrom);
557  0 _cto = null; // discontinuity
558    } else {
559  0 _cfrom[1]= seqps;
560    }
561  0 if (_cto==null || pdbpos!=1+_cto[1])
562    {
563  0 _cto = new int[] { pdbpos,pdbpos};
564  0 to.add(_cto);
565    } else {
566  0 _cto[1] = pdbpos;
567    }
568    }
569  0 _cfrom = new int[from.size() * 2];
570  0 _cto = new int[to.size() * 2];
571  0 int p = 0;
572  0 for (int[] range : from)
573    {
574  0 _cfrom[p++] = range[0];
575  0 _cfrom[p++] = range[1];
576    }
577  0 ;
578  0 p = 0;
579  0 for (int[] range : to)
580    {
581  0 _cto[p++] = range[0];
582  0 _cto[p++] = range[1];
583    }
584  0 ;
585   
586  0 seqFromPdbMapping = new jalview.datamodel.Mapping(null, _cto, _cfrom,
587    1,
588    1);
589  0 pdbStart = mapping.get(seqStart)[PDB_RES_POS];
590  0 pdbEnd = mapping.get(seqEnd)[PDB_RES_POS];
591  0 int orignalSeqStart = seq.getStart();
592  0 if (orignalSeqStart >= 1)
593    {
594  0 int subSeqStart = (seqStart >= orignalSeqStart)
595    ? seqStart - orignalSeqStart
596    : 0;
597  0 int subSeqEnd = seqEnd - (orignalSeqStart - 1);
598  0 subSeqEnd = originalSeq.length() < subSeqEnd ? originalSeq.length()
599    : subSeqEnd;
600  0 matchedSeq = originalSeq.substring(subSeqStart, subSeqEnd);
601    }
602    else
603    {
604  0 matchedSeq = originalSeq.substring(1, originalSeq.length());
605    }
606    }
607   
608  0 StringBuilder targetStrucSeqs = new StringBuilder();
609  0 for (String res : resNumMap.values())
610    {
611  0 targetStrucSeqs.append(res);
612    }
613   
614  0 if (os != null)
615    {
616  0 MappingOutputPojo mop = new MappingOutputPojo();
617  0 mop.setSeqStart(seqStart);
618  0 mop.setSeqEnd(seqEnd);
619  0 mop.setSeqName(seq.getName());
620  0 mop.setSeqResidue(matchedSeq);
621   
622  0 mop.setStrStart(pdbStart);
623  0 mop.setStrEnd(pdbEnd);
624  0 mop.setStrName(structId);
625  0 mop.setStrResidue(targetStrucSeqs.toString());
626   
627  0 mop.setType("pep");
628  0 os.print(getMappingOutput(mop).toString());
629  0 os.println();
630    }
631  0 return mapping;
632    }
633   
 
634  0 toggle void processSegments(List<Segment> segments, SegmentHelperPojo shp)
635    {
636  0 SequenceI seq = shp.getSeq();
637  0 HashMap<Integer, int[]> mapping = shp.getMapping();
638  0 TreeMap<Integer, String> resNumMap = shp.getResNumMap();
639  0 List<Integer> omitNonObserved = shp.getOmitNonObserved();
640  0 int nonObservedShiftIndex = shp.getNonObservedShiftIndex();
641  0 int pdbeNonObservedCount = shp.getPdbeNonObserved();
642  0 int firstPDBResNum = UNASSIGNED;
643  0 for (Segment segment : segments)
644    {
645    // System.out.println("Mapping segments : " + segment.getSegId() + "\\"s
646    // + segStartEnd);
647  0 List<Residue> residues = segment.getListResidue().getResidue();
648  0 for (Residue residue : residues)
649    {
650  0 boolean isObserved = isResidueObserved(residue);
651  0 int pdbeIndex = getLeadingIntegerValue(residue.getDbResNum(),
652    UNASSIGNED);
653  0 int currSeqIndex = UNASSIGNED;
654  0 List<CrossRefDb> cRefDbs = residue.getCrossRefDb();
655  0 CrossRefDb pdbRefDb = null;
656  0 for (CrossRefDb cRefDb : cRefDbs)
657    {
658  0 if (cRefDb.getDbSource().equalsIgnoreCase(DBRefSource.PDB))
659    {
660  0 pdbRefDb = cRefDb;
661  0 if (firstPDBResNum == UNASSIGNED)
662    {
663  0 firstPDBResNum = getLeadingIntegerValue(cRefDb.getDbResNum(),
664    UNASSIGNED);
665    }
666    else
667    {
668  0 if (isObserved)
669    {
670    // after we find the first observed residue we just increment
671  0 firstPDBResNum++;
672    }
673    }
674    }
675  0 if (cRefDb.getDbCoordSys().equalsIgnoreCase(seqCoordSys.getName())
676    && isAccessionMatched(cRefDb.getDbAccessionId()))
677    {
678  0 currSeqIndex = getLeadingIntegerValue(cRefDb.getDbResNum(),
679    UNASSIGNED);
680  0 if (pdbRefDb != null)
681    {
682  0 break;// exit loop if pdb and uniprot are already found
683    }
684    }
685    }
686  0 if (!isObserved)
687    {
688  0 ++pdbeNonObservedCount;
689    }
690  0 if (seqCoordSys == seqCoordSys.PDB) // FIXME: is seqCoordSys ever PDBe
691    // ???
692    {
693    // if the sequence has a primary reference to the PDB, then we are
694    // dealing with a sequence extracted directly from the PDB. In that
695    // case, numbering is PDBe - non-observed residues
696  0 currSeqIndex = seq.getStart() - 1 + pdbeIndex;
697    }
698  0 if (!isObserved)
699    {
700  0 if (seqCoordSys != CoordinateSys.UNIPROT) // FIXME: PDB or PDBe only
701    // here
702    {
703    // mapping to PDB or PDBe so we need to bookkeep for the
704    // non-observed
705    // SEQRES positions
706  0 omitNonObserved.add(currSeqIndex);
707  0 ++nonObservedShiftIndex;
708    }
709    }
710  0 if (currSeqIndex == UNASSIGNED)
711    {
712    // change in logic - unobserved residues with no currSeqIndex
713    // corresponding are still counted in both nonObservedShiftIndex and
714    // pdbeIndex...
715  0 continue;
716    }
717    // if (currSeqIndex >= seq.getStart() && currSeqIndex <= seqlength) //
718    // true
719    // numbering
720    // is
721    // not
722    // up
723    // to
724    // seq.getEnd()
725    {
726   
727  0 int resNum = (pdbRefDb == null)
728    ? getLeadingIntegerValue(residue.getDbResNum(),
729    UNASSIGNED)
730    : getLeadingIntegerValue(pdbRefDb.getDbResNum(),
731    UNASSIGNED);
732   
733  0 if (isObserved)
734    {
735  0 char resCharCode = ResidueProperties
736    .getSingleCharacterCode(ResidueProperties
737    .getCanonicalAminoAcid(residue.getDbResName()));
738  0 resNumMap.put(currSeqIndex, String.valueOf(resCharCode));
739   
740  0 int[] mappingcols = new int[] { Integer.valueOf(resNum),
741  0 UNASSIGNED, isObserved ? firstPDBResNum : UNASSIGNED };
742   
743  0 mapping.put(currSeqIndex - nonObservedShiftIndex, mappingcols);
744    }
745    }
746    }
747    }
748    }
749   
750    /**
751    * Get the leading integer part of a string that begins with an integer.
752    *
753    * @param input
754    * - the string input to process
755    * @param failValue
756    * - value returned if unsuccessful
757    * @return
758    */
 
759  0 toggle static int getLeadingIntegerValue(String input, int failValue)
760    {
761  0 if (input == null)
762    {
763  0 return failValue;
764    }
765  0 String[] parts = input.split("(?=\\D)(?<=\\d)");
766  0 if (parts != null && parts.length > 0 && parts[0].matches("[0-9]+"))
767    {
768  0 return Integer.valueOf(parts[0]);
769    }
770  0 return failValue;
771    }
772   
773    /**
774    *
775    * @param chainId
776    * Target chain to populate mapping of its atom positions.
777    * @param mapping
778    * Two dimension array of residue index versus atom position
779    * @throws IllegalArgumentException
780    * Thrown if chainId or mapping is null
781    * @throws SiftsException
782    */
 
783  0 toggle void populateAtomPositions(String chainId, Map<Integer, int[]> mapping)
784    throws IllegalArgumentException, SiftsException
785    {
786  0 try
787    {
788  0 PDBChain chain = pdb.findChain(chainId);
789   
790  0 if (chain == null || mapping == null)
791    {
792  0 throw new IllegalArgumentException(
793    "Chain id or mapping must not be null.");
794    }
795  0 for (int[] map : mapping.values())
796    {
797  0 if (map[PDB_RES_POS] != UNASSIGNED)
798    {
799  0 map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms);
800    }
801    }
802    } catch (NullPointerException e)
803    {
804  0 throw new SiftsException(e.getMessage());
805    } catch (Exception e)
806    {
807  0 throw new SiftsException(e.getMessage());
808    }
809    }
810   
811    /**
812    *
813    * @param residueIndex
814    * The residue index used for the search
815    * @param atoms
816    * A collection of Atom to search
817    * @return atom position for the given residue index
818    */
 
819  0 toggle int getAtomIndex(int residueIndex, Collection<Atom> atoms)
820    {
821  0 if (atoms == null)
822    {
823  0 throw new IllegalArgumentException(
824    "atoms collection must not be null!");
825    }
826  0 for (Atom atom : atoms)
827    {
828  0 if (atom.resNumber == residueIndex)
829    {
830  0 return atom.atomIndex;
831    }
832    }
833  0 return UNASSIGNED;
834    }
835   
836    /**
837    * Checks if the residue instance is marked 'Not_observed' or not
838    *
839    * @param residue
840    * @return
841    */
 
842  0 toggle private boolean isResidueObserved(Residue residue)
843    {
844  0 Set<String> annotations = getResidueAnnotaitons(residue,
845    ResidueDetailType.ANNOTATION);
846  0 if (annotations == null || annotations.isEmpty())
847    {
848  0 return true;
849    }
850  0 for (String annotation : annotations)
851    {
852  0 if (annotation.equalsIgnoreCase(NOT_OBSERVED))
853    {
854  0 return false;
855    }
856    }
857  0 return true;
858    }
859   
860    /**
861    * Get annotation String for a given residue and annotation type
862    *
863    * @param residue
864    * @param type
865    * @return
866    */
 
867  0 toggle private Set<String> getResidueAnnotaitons(Residue residue,
868    ResidueDetailType type)
869    {
870  0 HashSet<String> foundAnnotations = new HashSet<String>();
871  0 List<ResidueDetail> resDetails = residue.getResidueDetail();
872  0 for (ResidueDetail resDetail : resDetails)
873    {
874  0 if (resDetail.getProperty().equalsIgnoreCase(type.getCode()))
875    {
876  0 foundAnnotations.add(resDetail.getContent());
877    }
878    }
879  0 return foundAnnotations;
880    }
881   
 
882  0 toggle @Override
883    public boolean isAccessionMatched(String accession)
884    {
885  0 boolean isStrictMatch = true;
886  0 return isStrictMatch ? curSourceDBRef.equalsIgnoreCase(accession)
887    : curDBRefAccessionIdsString.contains(accession.toLowerCase());
888    }
889   
 
890  0 toggle private boolean isFoundInSiftsEntry(String accessionId)
891    {
892  0 Set<String> siftsDBRefs = getAllMappingAccession();
893  0 return accessionId != null
894    && siftsDBRefs.contains(accessionId.toLowerCase());
895    }
896   
897    /**
898    * Pad omitted residue positions in PDB sequence with gaps
899    *
900    * @param resNumMap
901    */
 
902  0 toggle void padWithGaps(Map<Integer, String> resNumMap,
903    List<Integer> omitNonObserved)
904    {
905  0 if (resNumMap == null || resNumMap.isEmpty())
906    {
907  0 return;
908    }
909  0 Integer[] keys = resNumMap.keySet().toArray(new Integer[0]);
910    // Arrays.sort(keys);
911  0 int firstIndex = keys[0];
912  0 int lastIndex = keys[keys.length - 1];
913    // System.out.println("Min value " + firstIndex);
914    // System.out.println("Max value " + lastIndex);
915  0 for (int x = firstIndex; x <= lastIndex; x++)
916    {
917  0 if (!resNumMap.containsKey(x) && !omitNonObserved.contains(x))
918    {
919  0 resNumMap.put(x, "-");
920    }
921    }
922    }
923   
 
924  0 toggle @Override
925    public Entity getEntityById(String id) throws SiftsException
926    {
927    // Determines an entity to process by performing a heuristic matching of all
928    // Entities with the given chainId and choosing the best matching Entity
929  0 Entity entity = getEntityByMostOptimalMatchedId(id);
930  0 if (entity != null)
931    {
932  0 return entity;
933    }
934  0 throw new SiftsException("Entity " + id + " not found");
935    }
936   
937    /**
938    * This method was added because EntityId is NOT always equal to ChainId.
939    * Hence, it provides the logic to greedily detect the "true" Entity for a
940    * given chainId where discrepancies exist.
941    *
942    * @param chainId
943    * @return
944    */
 
945  0 toggle public Entity getEntityByMostOptimalMatchedId(String chainId)
946    {
947    // System.out.println("---> advanced greedy entityId matching block
948    // entered..");
949  0 List<Entity> entities = siftsEntry.getEntity();
950  0 SiftsEntitySortPojo[] sPojo = new SiftsEntitySortPojo[entities.size()];
951  0 int count = 0;
952  0 for (Entity entity : entities)
953    {
954  0 sPojo[count] = new SiftsEntitySortPojo();
955  0 sPojo[count].entityId = entity.getEntityId();
956   
957  0 List<Segment> segments = entity.getSegment();
958  0 for (Segment segment : segments)
959    {
960  0 List<Residue> residues = segment.getListResidue().getResidue();
961  0 for (Residue residue : residues)
962    {
963  0 List<CrossRefDb> cRefDbs = residue.getCrossRefDb();
964  0 for (CrossRefDb cRefDb : cRefDbs)
965    {
966  0 if (!cRefDb.getDbSource().equalsIgnoreCase("PDB"))
967    {
968  0 continue;
969    }
970  0 ++sPojo[count].resCount;
971  0 if (cRefDb.getDbChainId().equalsIgnoreCase(chainId))
972    {
973  0 ++sPojo[count].chainIdFreq;
974    }
975    }
976    }
977    }
978  0 sPojo[count].pid = (100 * sPojo[count].chainIdFreq)
979    / sPojo[count].resCount;
980  0 ++count;
981    }
982  0 Arrays.sort(sPojo, Collections.reverseOrder());
983    // System.out.println("highest matched entity : " + sPojo[0].entityId);
984    // System.out.println("highest matched pid : " + sPojo[0].pid);
985   
986  0 if (sPojo[0].entityId != null)
987    {
988  0 if (sPojo[0].pid < 1)
989    {
990  0 return null;
991    }
992  0 for (Entity entity : entities)
993    {
994  0 if (!entity.getEntityId().equalsIgnoreCase(sPojo[0].entityId))
995    {
996  0 continue;
997    }
998  0 return entity;
999    }
1000    }
1001  0 return null;
1002    }
1003   
 
1004    private class SiftsEntitySortPojo
1005    implements Comparable<SiftsEntitySortPojo>
1006    {
1007    public String entityId;
1008   
1009    public int chainIdFreq;
1010   
1011    public int pid;
1012   
1013    public int resCount;
1014   
 
1015  0 toggle @Override
1016    public int compareTo(SiftsEntitySortPojo o)
1017    {
1018  0 return this.pid - o.pid;
1019    }
1020    }
1021   
 
1022    private class SegmentHelperPojo
1023    {
1024    private SequenceI seq;
1025   
1026    private HashMap<Integer, int[]> mapping;
1027   
1028    private TreeMap<Integer, String> resNumMap;
1029   
1030    private List<Integer> omitNonObserved;
1031   
1032    private int nonObservedShiftIndex;
1033   
1034    /**
1035    * count of number of 'not observed' positions in the PDB record's SEQRES
1036    * (total number of residues with coordinates == length(SEQRES) -
1037    * pdbeNonObserved
1038    */
1039    private int pdbeNonObserved;
1040   
 
1041  0 toggle public SegmentHelperPojo(SequenceI seq, HashMap<Integer, int[]> mapping,
1042    TreeMap<Integer, String> resNumMap,
1043    List<Integer> omitNonObserved, int nonObservedShiftIndex,
1044    int pdbeNonObserved)
1045    {
1046  0 setSeq(seq);
1047  0 setMapping(mapping);
1048  0 setResNumMap(resNumMap);
1049  0 setOmitNonObserved(omitNonObserved);
1050  0 setNonObservedShiftIndex(nonObservedShiftIndex);
1051  0 setPdbeNonObserved(pdbeNonObserved);
1052   
1053    }
1054   
 
1055  0 toggle public void setPdbeNonObserved(int pdbeNonObserved2)
1056    {
1057  0 this.pdbeNonObserved = pdbeNonObserved2;
1058    }
1059   
 
1060  0 toggle public int getPdbeNonObserved()
1061    {
1062  0 return pdbeNonObserved;
1063    }
 
1064  0 toggle public SequenceI getSeq()
1065    {
1066  0 return seq;
1067    }
1068   
 
1069  0 toggle public void setSeq(SequenceI seq)
1070    {
1071  0 this.seq = seq;
1072    }
1073   
 
1074  0 toggle public HashMap<Integer, int[]> getMapping()
1075    {
1076  0 return mapping;
1077    }
1078   
 
1079  0 toggle public void setMapping(HashMap<Integer, int[]> mapping)
1080    {
1081  0 this.mapping = mapping;
1082    }
1083   
 
1084  0 toggle public TreeMap<Integer, String> getResNumMap()
1085    {
1086  0 return resNumMap;
1087    }
1088   
 
1089  0 toggle public void setResNumMap(TreeMap<Integer, String> resNumMap)
1090    {
1091  0 this.resNumMap = resNumMap;
1092    }
1093   
 
1094  0 toggle public List<Integer> getOmitNonObserved()
1095    {
1096  0 return omitNonObserved;
1097    }
1098   
 
1099  0 toggle public void setOmitNonObserved(List<Integer> omitNonObserved)
1100    {
1101  0 this.omitNonObserved = omitNonObserved;
1102    }
1103   
 
1104  0 toggle public int getNonObservedShiftIndex()
1105    {
1106  0 return nonObservedShiftIndex;
1107    }
1108   
 
1109  0 toggle public void setNonObservedShiftIndex(int nonObservedShiftIndex)
1110    {
1111  0 this.nonObservedShiftIndex = nonObservedShiftIndex;
1112    }
1113   
1114    }
1115   
 
1116  0 toggle @Override
1117    public StringBuilder getMappingOutput(MappingOutputPojo mp)
1118    throws SiftsException
1119    {
1120  0 String seqRes = mp.getSeqResidue();
1121  0 String seqName = mp.getSeqName();
1122  0 int sStart = mp.getSeqStart();
1123  0 int sEnd = mp.getSeqEnd();
1124   
1125  0 String strRes = mp.getStrResidue();
1126  0 String strName = mp.getStrName();
1127  0 int pdbStart = mp.getStrStart();
1128  0 int pdbEnd = mp.getStrEnd();
1129   
1130  0 String type = mp.getType();
1131   
1132  0 int maxid = (seqName.length() >= strName.length()) ? seqName.length()
1133    : strName.length();
1134  0 int len = 72 - maxid - 1;
1135   
1136  0 int nochunks = ((seqRes.length()) / len)
1137  0 + ((seqRes.length()) % len > 0 ? 1 : 0);
1138    // output mappings
1139  0 StringBuilder output = new StringBuilder(512);
1140  0 output.append(NEWLINE);
1141  0 output.append("Sequence \u27f7 Structure mapping details")
1142    .append(NEWLINE);
1143  0 output.append("Method: SIFTS");
1144  0 output.append(NEWLINE).append(NEWLINE);
1145   
1146  0 output.append(new Format("%" + maxid + "s").form(seqName));
1147  0 output.append(" : ");
1148  0 output.append(String.valueOf(sStart));
1149  0 output.append(" - ");
1150  0 output.append(String.valueOf(sEnd));
1151  0 output.append(" Maps to ");
1152  0 output.append(NEWLINE);
1153  0 output.append(new Format("%" + maxid + "s").form(structId));
1154  0 output.append(" : ");
1155  0 output.append(String.valueOf(pdbStart));
1156  0 output.append(" - ");
1157  0 output.append(String.valueOf(pdbEnd));
1158  0 output.append(NEWLINE).append(NEWLINE);
1159   
1160  0 ScoreMatrix pam250 = ScoreModels.getInstance().getPam250();
1161  0 int matchedSeqCount = 0;
1162  0 for (int j = 0; j < nochunks; j++)
1163    {
1164    // Print the first aligned sequence
1165  0 output.append(new Format("%" + (maxid) + "s").form(seqName))
1166    .append(" ");
1167   
1168  0 for (int i = 0; i < len; i++)
1169    {
1170  0 if ((i + (j * len)) < seqRes.length())
1171    {
1172  0 output.append(seqRes.charAt(i + (j * len)));
1173    }
1174    }
1175   
1176  0 output.append(NEWLINE);
1177  0 output.append(new Format("%" + (maxid) + "s").form(" ")).append(" ");
1178   
1179    /*
1180    * Print out the match symbols:
1181    * | for exact match (ignoring case)
1182    * . if PAM250 score is positive
1183    * else a space
1184    */
1185  0 for (int i = 0; i < len; i++)
1186    {
1187  0 try
1188    {
1189  0 if ((i + (j * len)) < seqRes.length())
1190    {
1191  0 char c1 = seqRes.charAt(i + (j * len));
1192  0 char c2 = strRes.charAt(i + (j * len));
1193  0 boolean sameChar = Comparison.isSameResidue(c1, c2, false);
1194  0 if (sameChar && !Comparison.isGap(c1))
1195    {
1196  0 matchedSeqCount++;
1197  0 output.append("|");
1198    }
1199  0 else if (type.equals("pep"))
1200    {
1201  0 if (pam250.getPairwiseScore(c1, c2) > 0)
1202    {
1203  0 output.append(".");
1204    }
1205    else
1206    {
1207  0 output.append(" ");
1208    }
1209    }
1210    else
1211    {
1212  0 output.append(" ");
1213    }
1214    }
1215    } catch (IndexOutOfBoundsException e)
1216    {
1217  0 continue;
1218    }
1219    }
1220    // Now print the second aligned sequence
1221  0 output = output.append(NEWLINE);
1222  0 output = output.append(new Format("%" + (maxid) + "s").form(strName))
1223    .append(" ");
1224  0 for (int i = 0; i < len; i++)
1225    {
1226  0 if ((i + (j * len)) < strRes.length())
1227    {
1228  0 output.append(strRes.charAt(i + (j * len)));
1229    }
1230    }
1231  0 output.append(NEWLINE).append(NEWLINE);
1232    }
1233  0 float pid = (float) matchedSeqCount / seqRes.length() * 100;
1234  0 if (pid < SiftsSettings.getFailSafePIDThreshold())
1235    {
1236  0 throw new SiftsException(">>> Low PID detected for SIFTs mapping...");
1237    }
1238  0 output.append("Length of alignment = " + seqRes.length())
1239    .append(NEWLINE);
1240  0 output.append(new Format("Percentage ID = %2.2f").form(pid));
1241  0 return output;
1242    }
1243   
 
1244  0 toggle @Override
1245    public int getEntityCount()
1246    {
1247  0 return siftsEntry.getEntity().size();
1248    }
1249   
 
1250  0 toggle @Override
1251    public String getDbAccessionId()
1252    {
1253  0 return siftsEntry.getDbAccessionId();
1254    }
1255   
 
1256  0 toggle @Override
1257    public String getDbCoordSys()
1258    {
1259  0 return siftsEntry.getDbCoordSys();
1260    }
1261   
 
1262  0 toggle @Override
1263    public String getDbSource()
1264    {
1265  0 return siftsEntry.getDbSource();
1266    }
1267   
 
1268  0 toggle @Override
1269    public String getDbVersion()
1270    {
1271  0 return siftsEntry.getDbVersion();
1272    }
1273   
 
1274  0 toggle public static void setMockSiftsFile(File file)
1275    {
1276  0 mockSiftsFile = file;
1277    }
1278   
1279    }