Clover icon

Coverage Report

  1. Project Clover database Wed Dec 3 2025 17:03:17 GMT
  2. Package jalview.ws.sifts

File SiftsClient.java

 

Coverage histogram

../../../img/srcFileCovDistChart1.png
57% of files have more coverage

Code metrics

136
405
51
5
1,282
953
145
0.36
7.94
10.2
2.84

Classes

Class Line # Actions
SiftsClient 82 382 127
0.067150646.7%
SiftsClient.CoordinateSys 131 2 2
0.550%
SiftsClient.ResidueDetailType 147 2 2
0.00%
SiftsClient.SiftsEntitySortPojo 1007 1 1
0.00%
SiftsClient.SegmentHelperPojo 1025 18 13
0.00%
 

Contributing tests

This file is covered by 1 test. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.ws.sifts;
22   
23    import java.io.File;
24    import java.io.FileInputStream;
25    import java.io.FileOutputStream;
26    import java.io.IOException;
27    import java.io.InputStream;
28    import java.io.PrintStream;
29    import java.net.URL;
30    import java.net.URLConnection;
31    import java.nio.file.Files;
32    import java.nio.file.Path;
33    import java.nio.file.attribute.BasicFileAttributes;
34    import java.util.ArrayList;
35    import java.util.Arrays;
36    import java.util.Collection;
37    import java.util.Collections;
38    import java.util.Date;
39    import java.util.HashMap;
40    import java.util.HashSet;
41    import java.util.List;
42    import java.util.Locale;
43    import java.util.Map;
44    import java.util.Set;
45    import java.util.TreeMap;
46    import java.util.zip.GZIPInputStream;
47   
48    import javax.xml.bind.JAXBContext;
49    import javax.xml.bind.JAXBElement;
50    import javax.xml.bind.Unmarshaller;
51    import javax.xml.stream.XMLInputFactory;
52    import javax.xml.stream.XMLStreamReader;
53   
54    import jalview.analysis.AlignSeq;
55    import jalview.analysis.scoremodels.ScoreMatrix;
56    import jalview.analysis.scoremodels.ScoreModels;
57    import jalview.api.DBRefEntryI;
58    import jalview.api.SiftsClientI;
59    import jalview.bin.Console;
60    import jalview.datamodel.DBRefEntry;
61    import jalview.datamodel.DBRefSource;
62    import jalview.datamodel.SequenceI;
63    import jalview.io.BackupFiles;
64    import jalview.io.StructureFile;
65    import jalview.schemes.ResidueProperties;
66    import jalview.structure.StructureMapping;
67    import jalview.util.Comparison;
68    import jalview.util.DBRefUtils;
69    import jalview.util.Format;
70    import jalview.util.HttpUtils;
71    import jalview.util.Platform;
72    import jalview.xml.binding.sifts.Entry;
73    import jalview.xml.binding.sifts.Entry.Entity;
74    import jalview.xml.binding.sifts.Entry.Entity.Segment;
75    import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion;
76    import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue;
77    import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb;
78    import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail;
79    import mc_view.Atom;
80    import mc_view.PDBChain;
81   
 
82    public class SiftsClient implements SiftsClientI
83    {
84    /*
85    * for use in mocking out file fetch for tests only
86    * - reset to null after testing!
87    */
88    private static File mockSiftsFile;
89   
90    private Entry siftsEntry;
91   
92    private StructureFile pdb;
93   
94    private String pdbId;
95   
96    private String structId;
97   
98    private CoordinateSys seqCoordSys = CoordinateSys.UNIPROT;
99   
100    /**
101    * PDB sequence position to sequence coordinate mapping as derived from SIFTS
102    * record for the identified SeqCoordSys Used for lift-over from sequence
103    * derived from PDB (with first extracted PDBRESNUM as 'start' to the sequence
104    * being annotated with PDB data
105    */
106    private jalview.datamodel.Mapping seqFromPdbMapping;
107   
108    private static final int BUFFER_SIZE = 4096;
109   
110    public static final int UNASSIGNED = Integer.MIN_VALUE;
111   
112    private static final int PDB_RES_POS = 0;
113   
114    private static final int PDB_ATOM_POS = 1;
115   
116    private static final int PDBE_POS = 2;
117   
118    private static final String NOT_OBSERVED = "Not_Observed";
119   
120    private static final String SIFTS_SPLIT_FTP_BASE_URL = "https://ftp.ebi.ac.uk/pub/databases/msd/sifts/split_xml/";
121   
122    private final static String NEWLINE = System.lineSeparator();
123   
124    private static final boolean GET_STREAM = false;
125    private static final boolean CACHE_FILE = true;
126    private String curSourceDBRef;
127   
128    private HashSet<String> curDBRefAccessionIdsString;
129    private boolean doCache = false;
130   
 
131    private enum CoordinateSys
132    {
133    UNIPROT("UniProt"), PDB("PDBresnum"), PDBe("PDBe");
134    private String name;
135   
 
136  12 toggle private CoordinateSys(String name)
137    {
138  12 this.name = name;
139    }
140   
 
141  0 toggle public String getName()
142    {
143  0 return name;
144    }
145    };
146   
 
147    private enum ResidueDetailType
148    {
149    NAME_SEC_STRUCTURE("nameSecondaryStructure"),
150    CODE_SEC_STRUCTURE("codeSecondaryStructure"), ANNOTATION("Annotation");
151    private String code;
152   
 
153  0 toggle private ResidueDetailType(String code)
154    {
155  0 this.code = code;
156    }
157   
 
158  0 toggle public String getCode()
159    {
160  0 return code;
161    }
162    };
163   
164    /**
165    * Fetch SIFTs file for the given PDBfile and construct an instance of
166    * SiftsClient
167    *
168    * @param pdbId
169    * @throws SiftsException
170    */
 
171  4 toggle public SiftsClient(StructureFile pdb) throws SiftsException
172    {
173  4 this.pdb = pdb;
174  4 this.pdbId = pdb.getId();
175  4 if (doCache) {
176  0 File siftsFile = getSiftsFile(pdbId);
177  0 siftsEntry = parseSIFTs(siftsFile);
178    } else {
179  4 siftsEntry = parseSIFTSStreamFor(pdbId);
180    }
181    }
182   
183    /**
184    * A more streamlined version of SIFT reading that allows for streaming of the data.
185    *
186    * @param pdbId
187    * @return
188    * @throws SiftsException
189    */
 
190  4 toggle private static Entry parseSIFTSStreamFor(String pdbId) throws SiftsException
191    {
192  4 try
193    {
194  4 InputStream is = (InputStream) downloadSifts(pdbId, GET_STREAM);
195  4 return parseSIFTs(is);
196    } catch (Exception e)
197    {
198  0 throw new SiftsException(e.getMessage());
199    }
200    }
201   
202    /**
203    * Parse the given SIFTs File and return a JAXB POJO of parsed data
204    *
205    * @param siftFile
206    * - the GZipped SIFTs XML file to parse
207    * @return
208    * @throws Exception
209    * if a problem occurs while parsing the SIFTs XML
210    */
 
211  0 toggle private Entry parseSIFTs(File siftFile) throws SiftsException
212    {
213  0 try (InputStream in = new FileInputStream(siftFile)) {
214  0 return parseSIFTs(in);
215    } catch (Exception e)
216    {
217  0 e.printStackTrace();
218  0 throw new SiftsException(e.getMessage());
219    }
220    }
221   
 
222  4 toggle private static Entry parseSIFTs(InputStream in) throws Exception {
223  4 try (GZIPInputStream gzis = new GZIPInputStream(in);)
224    {
225    // jalview.bin.Console.outPrintln("File : " + siftFile.getAbsolutePath());
226  4 JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.sifts");
227  4 XMLStreamReader streamReader = XMLInputFactory.newInstance()
228    .createXMLStreamReader(gzis);
229  4 Unmarshaller um = jc.createUnmarshaller();
230  4 JAXBElement<Entry> jbe = um.unmarshal(streamReader, Entry.class);
231  4 return jbe.getValue();
232    } catch (Exception e)
233    {
234  0 e.printStackTrace();
235  0 throw new SiftsException(e.getMessage());
236    }
237    }
238   
239    /**
240    * Get a SIFTs XML file for a given PDB Id from Cache or download from FTP
241    * repository if not found in cache
242    *
243    * @param pdbId
244    * @return SIFTs XML file
245    * @throws SiftsException
246    */
 
247  0 toggle public static File getSiftsFile(String pdbId) throws SiftsException
248    {
249    /*
250    * return mocked file if it has been set
251    */
252  0 if (mockSiftsFile != null)
253    {
254  0 return mockSiftsFile;
255    }
256   
257  0 String siftsFileName = SiftsSettings.getSiftDownloadDirectory()
258    + pdbId.toLowerCase(Locale.ROOT) + ".xml.gz";
259  0 File siftsFile = new File(siftsFileName);
260  0 if (siftsFile.exists())
261    {
262    // The line below is required for unit testing... don't comment it out!!!
263  0 jalview.bin.Console
264    .outPrintln(">>> SIFTS File already downloaded for " + pdbId);
265   
266  0 if (Platform.isFileOlderThanThreshold(siftsFile,
267    SiftsSettings.getCacheThresholdInDays()))
268    {
269  0 File oldSiftsFile = new File(siftsFileName + "_old");
270  0 BackupFiles.moveFileToFile(siftsFile, oldSiftsFile);
271  0 try
272    {
273  0 siftsFile = downloadSiftsFile(pdbId.toLowerCase(Locale.ROOT));
274  0 oldSiftsFile.delete();
275  0 return siftsFile;
276    } catch (IOException e)
277    {
278  0 e.printStackTrace();
279  0 BackupFiles.moveFileToFile(oldSiftsFile, siftsFile);
280  0 return new File(siftsFileName);
281    }
282    }
283    else
284    {
285  0 return siftsFile;
286    }
287    }
288  0 try
289    {
290  0 siftsFile = downloadSiftsFile(pdbId.toLowerCase(Locale.ROOT));
291    } catch (IOException e)
292    {
293  0 throw new SiftsException(e.getMessage());
294    }
295  0 return siftsFile;
296    }
297   
298    /**
299    * Download a SIFTs XML file for a given PDB Id from an FTP repository
300    *
301    * @param pdbId
302    * @return downloaded SIFTs XML file
303    * @throws SiftsException
304    * @throws IOException
305    */
 
306  0 toggle public static File downloadSiftsFile(String pdbId)
307    throws SiftsException, IOException
308    {
309  0 return (File) downloadSifts(pdbId, CACHE_FILE);
310    }
311   
312    /**
313    * Download SIFTs XML with the option to cache a file or to get a stream.
314    *
315    * @param pdbId
316    * @param asFile
317    * @return
318    * @throws IOException
319    */
 
320  4 toggle private static Object downloadSifts(String pdbId, boolean asFile) throws IOException
321    {
322  4 pdbId = pdbId.toLowerCase(Locale.ROOT);
323  4 if (pdbId.contains(".cif"))
324    {
325  0 pdbId = pdbId.replace(".cif", "");
326    }
327  4 String siftFile = pdbId + ".xml.gz";
328  4 String siftsFileFTPURL = getDownloadUrlFor(siftFile);
329   
330    /*
331    * Download the file from URL to either
332    * Java: directory of cached downloaded SIFTS files
333    * Javascript: temporary 'file' (in-memory cache)
334    */
335  4 File downloadTo = null;
336  4 if (asFile)
337    {
338  0 downloadTo = new File(
339    SiftsSettings.getSiftDownloadDirectory() + siftFile);
340  0 File siftsDownloadDir = new File(SiftsSettings.getSiftDownloadDirectory());
341  0 if (!siftsDownloadDir.exists())
342    {
343  0 siftsDownloadDir.mkdirs();
344    }
345    }
346  4 URL url = new URL(siftsFileFTPURL);
347  4 URLConnection conn = url.openConnection();
348  4 InputStream is = conn.getInputStream();
349  4 if (!asFile)
350  4 return is;
351    // This is MUCH more efficent in JavaScript, as we already have the bytes
352  0 Platform.streamToFile(is, downloadTo);
353  0 is.close();
354  0 return downloadTo;
355    }
356   
 
357  5 toggle public static String getDownloadUrlFor(String siftFile)
358    {
359  5 String durl = SIFTS_SPLIT_FTP_BASE_URL + siftFile.substring(1, 3) + "/"
360    + siftFile;
361  5 Console.trace("SIFTS URL for " + siftFile + " is " + durl);
362  5 return durl;
363   
364    }
365   
366    /**
367    * Delete the SIFTs file for the given PDB Id in the local SIFTs download
368    * directory
369    *
370    * @param pdbId
371    * @return true if the file was deleted or doesn't exist
372    */
 
373  0 toggle public static boolean deleteSiftsFileByPDBId(String pdbId)
374    {
375  0 File siftsFile = new File(SiftsSettings.getSiftDownloadDirectory()
376    + pdbId.toLowerCase(Locale.ROOT) + ".xml.gz");
377  0 if (siftsFile.exists())
378    {
379  0 return siftsFile.delete();
380    }
381  0 return true;
382    }
383   
384    /**
385    * Get a valid SIFTs DBRef for the given sequence current SIFTs entry
386    *
387    * @param seq
388    * - the target sequence for the operation
389    * @return a valid DBRefEntry that is SIFTs compatible
390    * @throws Exception
391    * if no valid source DBRefEntry was found for the given sequences
392    */
 
393  0 toggle public DBRefEntryI getValidSourceDBRef(SequenceI seq)
394    throws SiftsException
395    {
396  0 List<DBRefEntry> dbRefs = seq.getPrimaryDBRefs();
397  0 if (dbRefs == null || dbRefs.size() < 1)
398    {
399  0 throw new SiftsException(
400    "Source DBRef could not be determined. DBRefs might not have been retrieved.");
401    }
402   
403  0 for (DBRefEntry dbRef : dbRefs)
404    {
405  0 if (dbRef == null || dbRef.getAccessionId() == null
406    || dbRef.getSource() == null)
407    {
408  0 continue;
409    }
410  0 String canonicalSource = DBRefUtils
411    .getCanonicalName(dbRef.getSource());
412  0 if (isValidDBRefEntry(dbRef)
413    && (canonicalSource.equalsIgnoreCase(DBRefSource.UNIPROT)
414    || canonicalSource.equalsIgnoreCase(DBRefSource.PDB)))
415    {
416  0 return dbRef;
417    }
418    }
419  0 throw new SiftsException("Could not get source DB Ref");
420    }
421   
422    /**
423    * Check that the DBRef Entry is properly populated and is available in this
424    * SiftClient instance
425    *
426    * @param entry
427    * - DBRefEntry to validate
428    * @return true validation is successful otherwise false is returned.
429    */
 
430  0 toggle boolean isValidDBRefEntry(DBRefEntryI entry)
431    {
432  0 return entry != null && entry.getAccessionId() != null
433    && isFoundInSiftsEntry(entry.getAccessionId());
434    }
435   
 
436  0 toggle @Override
437    public HashSet<String> getAllMappingAccession()
438    {
439  0 HashSet<String> accessions = new HashSet<String>();
440  0 List<Entity> entities = siftsEntry.getEntity();
441  0 for (Entity entity : entities)
442    {
443  0 List<Segment> segments = entity.getSegment();
444  0 for (Segment segment : segments)
445    {
446  0 List<MapRegion> mapRegions = segment.getListMapRegion()
447    .getMapRegion();
448  0 for (MapRegion mapRegion : mapRegions)
449    {
450  0 accessions.add(mapRegion.getDb().getDbAccessionId()
451    .toLowerCase(Locale.ROOT));
452    }
453    }
454    }
455  0 return accessions;
456    }
457   
 
458  0 toggle @Override
459    public StructureMapping getSiftsStructureMapping(SequenceI seq,
460    String pdbFile, String chain) throws SiftsException
461    {
462  0 SequenceI aseq = seq;
463  0 while (seq.getDatasetSequence() != null)
464    {
465  0 seq = seq.getDatasetSequence();
466    }
467  0 structId = (chain == null) ? pdbId : pdbId + "|" + chain;
468  0 jalview.bin.Console.outPrintln("Getting SIFTS mapping for " + structId
469    + ": seq " + seq.getName());
470   
471  0 final StringBuilder mappingDetails = new StringBuilder(128);
472  0 PrintStream ps = new PrintStream(System.out)
473    {
 
474  0 toggle @Override
475    public void print(String x)
476    {
477  0 mappingDetails.append(x);
478    }
479   
 
480  0 toggle @Override
481    public void println()
482    {
483  0 mappingDetails.append(NEWLINE);
484    }
485    };
486  0 HashMap<Integer, int[]> mapping = getGreedyMapping(chain, seq, ps);
487   
488  0 String mappingOutput = mappingDetails.toString();
489  0 StructureMapping siftsMapping = new StructureMapping(aseq, pdbFile,
490    pdbId, chain, mapping, mappingOutput, seqFromPdbMapping);
491   
492  0 return siftsMapping;
493    }
494   
 
495  0 toggle @Override
496    public HashMap<Integer, int[]> getGreedyMapping(String entityId,
497    SequenceI seq, java.io.PrintStream os) throws SiftsException
498    {
499  0 List<Integer> omitNonObserved = new ArrayList<>();
500  0 int nonObservedShiftIndex = 0, pdbeNonObserved = 0;
501    // jalview.bin.Console.outPrintln("Generating mappings for : " + entityId);
502  0 Entity entity = null;
503  0 entity = getEntityById(entityId);
504  0 String originalSeq = AlignSeq.extractGaps(
505    jalview.util.Comparison.GapChars, seq.getSequenceAsString());
506  0 HashMap<Integer, int[]> mapping = new HashMap<Integer, int[]>();
507  0 DBRefEntryI sourceDBRef;
508  0 sourceDBRef = getValidSourceDBRef(seq);
509    // TODO ensure sequence start/end is in the same coordinate system and
510    // consistent with the choosen sourceDBRef
511   
512    // set sequence coordinate system - default value is UniProt
513  0 if (sourceDBRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
514    {
515  0 seqCoordSys = CoordinateSys.PDB;
516    }
517   
518  0 HashSet<String> dbRefAccessionIdsString = new HashSet<String>();
519  0 for (DBRefEntry dbref : seq.getDBRefs())
520    {
521  0 dbRefAccessionIdsString
522    .add(dbref.getAccessionId().toLowerCase(Locale.ROOT));
523    }
524  0 dbRefAccessionIdsString
525    .add(sourceDBRef.getAccessionId().toLowerCase(Locale.ROOT));
526   
527  0 curDBRefAccessionIdsString = dbRefAccessionIdsString;
528  0 curSourceDBRef = sourceDBRef.getAccessionId();
529   
530  0 TreeMap<Integer, String> resNumMap = new TreeMap<Integer, String>();
531  0 List<Segment> segments = entity.getSegment();
532  0 SegmentHelperPojo shp = new SegmentHelperPojo(seq, mapping, resNumMap,
533    omitNonObserved, nonObservedShiftIndex,pdbeNonObserved);
534  0 processSegments(segments, shp);
535  0 try
536    {
537  0 populateAtomPositions(entityId, mapping);
538    } catch (Exception e)
539    {
540  0 e.printStackTrace();
541    }
542  0 if (seqCoordSys == CoordinateSys.UNIPROT)
543    {
544  0 padWithGaps(resNumMap, omitNonObserved);
545    }
546  0 int seqStart = UNASSIGNED;
547  0 int seqEnd = UNASSIGNED;
548  0 int pdbStart = UNASSIGNED;
549  0 int pdbEnd = UNASSIGNED;
550   
551  0 if (mapping.isEmpty())
552    {
553  0 throw new SiftsException("SIFTS mapping failed for " + entityId
554    + " and " + seq.getName());
555    }
556    // also construct a mapping object between the seq-coord sys and the PDB seq's coord sys
557   
558  0 Integer[] keys = mapping.keySet().toArray(new Integer[0]);
559  0 Arrays.sort(keys);
560  0 seqStart = keys[0];
561  0 seqEnd = keys[keys.length - 1];
562  0 List<int[]> from=new ArrayList<>(),to=new ArrayList<>();
563  0 int[]_cfrom=null,_cto=null;
564  0 String matchedSeq = originalSeq;
565  0 if (seqStart != UNASSIGNED) // fixme! seqStart can map to -1 for a pdb sequence that starts <-1
566    {
567  0 for (int seqps:keys)
568    {
569  0 int pdbpos = mapping.get(seqps)[PDBE_POS];
570  0 if (pdbpos == UNASSIGNED)
571    {
572    // not correct - pdbpos might be -1, but leave it for now
573  0 continue;
574    }
575  0 if (_cfrom==null || seqps!=_cfrom[1]+1)
576    {
577  0 _cfrom = new int[] { seqps,seqps};
578  0 from.add(_cfrom);
579  0 _cto = null; // discontinuity
580    } else {
581  0 _cfrom[1]= seqps;
582    }
583  0 if (_cto==null || pdbpos!=1+_cto[1])
584    {
585  0 _cto = new int[] { pdbpos,pdbpos};
586  0 to.add(_cto);
587    } else {
588  0 _cto[1] = pdbpos;
589    }
590    }
591  0 _cfrom = new int[from.size() * 2];
592  0 _cto = new int[to.size() * 2];
593  0 int p = 0;
594  0 for (int[] range : from)
595    {
596  0 _cfrom[p++] = range[0];
597  0 _cfrom[p++] = range[1];
598    }
599  0 ;
600  0 p = 0;
601  0 for (int[] range : to)
602    {
603  0 _cto[p++] = range[0];
604  0 _cto[p++] = range[1];
605    }
606  0 ;
607   
608  0 seqFromPdbMapping = new jalview.datamodel.Mapping(null, _cto, _cfrom,
609    1,
610    1);
611  0 pdbStart = mapping.get(seqStart)[PDB_RES_POS];
612  0 pdbEnd = mapping.get(seqEnd)[PDB_RES_POS];
613  0 int orignalSeqStart = seq.getStart();
614  0 if (orignalSeqStart >= 1)
615    {
616  0 int subSeqStart = (seqStart >= orignalSeqStart)
617    ? seqStart - orignalSeqStart
618    : 0;
619  0 int subSeqEnd = seqEnd - (orignalSeqStart - 1);
620  0 subSeqEnd = originalSeq.length() < subSeqEnd ? originalSeq.length()
621    : subSeqEnd;
622  0 matchedSeq = originalSeq.substring(subSeqStart, subSeqEnd);
623    }
624    else
625    {
626  0 matchedSeq = originalSeq.substring(1, originalSeq.length());
627    }
628    }
629   
630  0 StringBuilder targetStrucSeqs = new StringBuilder();
631  0 for (String res : resNumMap.values())
632    {
633  0 targetStrucSeqs.append(res);
634    }
635   
636  0 if (os != null)
637    {
638  0 MappingOutputPojo mop = new MappingOutputPojo();
639  0 mop.setSeqStart(seqStart);
640  0 mop.setSeqEnd(seqEnd);
641  0 mop.setSeqName(seq.getName());
642  0 mop.setSeqResidue(matchedSeq);
643   
644  0 mop.setStrStart(pdbStart);
645  0 mop.setStrEnd(pdbEnd);
646  0 mop.setStrName(structId);
647  0 mop.setStrResidue(targetStrucSeqs.toString());
648   
649  0 mop.setType("pep");
650  0 os.print(getMappingOutput(mop).toString());
651  0 os.println();
652    }
653  0 return mapping;
654    }
655   
 
656  0 toggle void processSegments(List<Segment> segments, SegmentHelperPojo shp)
657    {
658  0 SequenceI seq = shp.getSeq();
659  0 HashMap<Integer, int[]> mapping = shp.getMapping();
660  0 TreeMap<Integer, String> resNumMap = shp.getResNumMap();
661  0 List<Integer> omitNonObserved = shp.getOmitNonObserved();
662  0 int nonObservedShiftIndex = shp.getNonObservedShiftIndex();
663  0 int pdbeNonObservedCount = shp.getPdbeNonObserved();
664  0 int firstPDBResNum = UNASSIGNED;
665  0 for (Segment segment : segments)
666    {
667    // jalview.bin.Console.outPrintln("Mapping segments : " +
668    // segment.getSegId() + "\\"s
669    // + segStartEnd);
670  0 List<Residue> residues = segment.getListResidue().getResidue();
671  0 for (Residue residue : residues)
672    {
673  0 boolean isObserved = isResidueObserved(residue);
674  0 int pdbeIndex = Platform.getLeadingIntegerValue(residue.getDbResNum(),
675    UNASSIGNED);
676  0 int currSeqIndex = UNASSIGNED;
677  0 List<CrossRefDb> cRefDbs = residue.getCrossRefDb();
678  0 CrossRefDb pdbRefDb = null;
679  0 for (CrossRefDb cRefDb : cRefDbs)
680    {
681  0 if (cRefDb.getDbSource().equalsIgnoreCase(DBRefSource.PDB))
682    {
683  0 pdbRefDb = cRefDb;
684  0 if (firstPDBResNum == UNASSIGNED)
685    {
686  0 firstPDBResNum = Platform.getLeadingIntegerValue(cRefDb.getDbResNum(),
687    UNASSIGNED);
688    }
689    else
690    {
691  0 if (isObserved)
692    {
693    // after we find the first observed residue we just increment
694  0 firstPDBResNum++;
695    }
696    }
697    }
698  0 if (cRefDb.getDbCoordSys().equalsIgnoreCase(seqCoordSys.getName())
699    && isAccessionMatched(cRefDb.getDbAccessionId()))
700    {
701  0 currSeqIndex = Platform.getLeadingIntegerValue(cRefDb.getDbResNum(),
702    UNASSIGNED);
703  0 if (pdbRefDb != null)
704    {
705  0 break;// exit loop if pdb and uniprot are already found
706    }
707    }
708    }
709  0 if (!isObserved)
710    {
711  0 ++pdbeNonObservedCount;
712    }
713  0 if (seqCoordSys == seqCoordSys.PDB) // FIXME: is seqCoordSys ever PDBe
714    // ???
715    {
716    // if the sequence has a primary reference to the PDB, then we are
717    // dealing with a sequence extracted directly from the PDB. In that
718    // case, numbering is PDBe - non-observed residues
719  0 currSeqIndex = seq.getStart() - 1 + pdbeIndex;
720    }
721  0 if (!isObserved)
722    {
723  0 if (seqCoordSys != CoordinateSys.UNIPROT) // FIXME: PDB or PDBe only
724    // here
725    {
726    // mapping to PDB or PDBe so we need to bookkeep for the
727    // non-observed
728    // SEQRES positions
729  0 omitNonObserved.add(currSeqIndex);
730  0 ++nonObservedShiftIndex;
731    }
732    }
733  0 if (currSeqIndex == UNASSIGNED)
734    {
735    // change in logic - unobserved residues with no currSeqIndex
736    // corresponding are still counted in both nonObservedShiftIndex and
737    // pdbeIndex...
738  0 continue;
739    }
740    // if (currSeqIndex >= seq.getStart() && currSeqIndex <= seqlength) //
741    // true
742    // numbering
743    // is
744    // not
745    // up
746    // to
747    // seq.getEnd()
748    {
749   
750  0 int resNum = (pdbRefDb == null)
751    ? Platform.getLeadingIntegerValue(residue.getDbResNum(),
752    UNASSIGNED)
753    : Platform.getLeadingIntegerValue(pdbRefDb.getDbResNum(),
754    UNASSIGNED);
755   
756  0 if (isObserved)
757    {
758  0 char resCharCode = ResidueProperties
759    .getSingleCharacterCode(ResidueProperties
760    .getCanonicalAminoAcid(residue.getDbResName()));
761  0 resNumMap.put(currSeqIndex, String.valueOf(resCharCode));
762   
763  0 int[] mappingcols = new int[] { Integer.valueOf(resNum),
764  0 UNASSIGNED, isObserved ? firstPDBResNum : UNASSIGNED };
765   
766  0 mapping.put(currSeqIndex - nonObservedShiftIndex, mappingcols);
767    }
768    }
769    }
770    }
771    }
772   
773    /**
774    *
775    * @param chainId
776    * Target chain to populate mapping of its atom positions.
777    * @param mapping
778    * Two dimension array of residue index versus atom position
779    * @throws IllegalArgumentException
780    * Thrown if chainId or mapping is null
781    * @throws SiftsException
782    */
 
783  0 toggle void populateAtomPositions(String chainId, Map<Integer, int[]> mapping)
784    throws IllegalArgumentException, SiftsException
785    {
786  0 try
787    {
788  0 PDBChain chain = pdb.findChain(chainId);
789   
790  0 if (chain == null || mapping == null)
791    {
792  0 throw new IllegalArgumentException(
793    "Chain id or mapping must not be null.");
794    }
795  0 for (int[] map : mapping.values())
796    {
797  0 if (map[PDB_RES_POS] != UNASSIGNED)
798    {
799  0 map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms);
800    }
801    }
802    } catch (NullPointerException e)
803    {
804  0 throw new SiftsException(e.getMessage());
805    } catch (Exception e)
806    {
807  0 throw new SiftsException(e.getMessage());
808    }
809    }
810   
811    /**
812    *
813    * @param residueIndex
814    * The residue index used for the search
815    * @param atoms
816    * A collection of Atom to search
817    * @return atom position for the given residue index
818    */
 
819  0 toggle int getAtomIndex(int residueIndex, Collection<Atom> atoms)
820    {
821  0 if (atoms == null)
822    {
823  0 throw new IllegalArgumentException(
824    "atoms collection must not be null!");
825    }
826  0 for (Atom atom : atoms)
827    {
828  0 if (atom.resNumber == residueIndex)
829    {
830  0 return atom.atomIndex;
831    }
832    }
833  0 return UNASSIGNED;
834    }
835   
836    /**
837    * Checks if the residue instance is marked 'Not_observed' or not
838    *
839    * @param residue
840    * @return
841    */
 
842  0 toggle private boolean isResidueObserved(Residue residue)
843    {
844  0 Set<String> annotations = getResidueAnnotaitons(residue,
845    ResidueDetailType.ANNOTATION);
846  0 if (annotations == null || annotations.isEmpty())
847    {
848  0 return true;
849    }
850  0 for (String annotation : annotations)
851    {
852  0 if (annotation.equalsIgnoreCase(NOT_OBSERVED))
853    {
854  0 return false;
855    }
856    }
857  0 return true;
858    }
859   
860    /**
861    * Get annotation String for a given residue and annotation type
862    *
863    * @param residue
864    * @param type
865    * @return
866    */
 
867  0 toggle private Set<String> getResidueAnnotaitons(Residue residue,
868    ResidueDetailType type)
869    {
870  0 HashSet<String> foundAnnotations = new HashSet<String>();
871  0 List<ResidueDetail> resDetails = residue.getResidueDetail();
872  0 for (ResidueDetail resDetail : resDetails)
873    {
874  0 if (resDetail.getProperty().equalsIgnoreCase(type.getCode()))
875    {
876  0 foundAnnotations.add(resDetail.getContent());
877    }
878    }
879  0 return foundAnnotations;
880    }
881   
 
882  0 toggle @Override
883    public boolean isAccessionMatched(String accession)
884    {
885  0 boolean isStrictMatch = true;
886  0 return isStrictMatch ? curSourceDBRef.equalsIgnoreCase(accession)
887    : curDBRefAccessionIdsString
888    .contains(accession.toLowerCase(Locale.ROOT));
889    }
890   
 
891  0 toggle private boolean isFoundInSiftsEntry(String accessionId)
892    {
893  0 Set<String> siftsDBRefs = getAllMappingAccession();
894  0 return accessionId != null
895    && siftsDBRefs.contains(accessionId.toLowerCase(Locale.ROOT));
896    }
897   
898    /**
899    * Pad omitted residue positions in PDB sequence with gaps
900    *
901    * @param resNumMap
902    */
 
903  0 toggle void padWithGaps(Map<Integer, String> resNumMap,
904    List<Integer> omitNonObserved)
905    {
906  0 if (resNumMap == null || resNumMap.isEmpty())
907    {
908  0 return;
909    }
910  0 Integer[] keys = resNumMap.keySet().toArray(new Integer[0]);
911    // Arrays.sort(keys);
912  0 int firstIndex = keys[0];
913  0 int lastIndex = keys[keys.length - 1];
914    // jalview.bin.Console.outPrintln("Min value " + firstIndex);
915    // jalview.bin.Console.outPrintln("Max value " + lastIndex);
916  0 for (int x = firstIndex; x <= lastIndex; x++)
917    {
918  0 if (!resNumMap.containsKey(x) && !omitNonObserved.contains(x))
919    {
920  0 resNumMap.put(x, "-");
921    }
922    }
923    }
924   
 
925  0 toggle @Override
926    public Entity getEntityById(String id) throws SiftsException
927    {
928    // Determines an entity to process by performing a heuristic matching of all
929    // Entities with the given chainId and choosing the best matching Entity
930  0 Entity entity = getEntityByMostOptimalMatchedId(id);
931  0 if (entity != null)
932    {
933  0 return entity;
934    }
935  0 throw new SiftsException("Entity " + id + " not found");
936    }
937   
938    /**
939    * This method was added because EntityId is NOT always equal to ChainId.
940    * Hence, it provides the logic to greedily detect the "true" Entity for a
941    * given chainId where discrepancies exist.
942    *
943    * @param chainId
944    * @return
945    */
 
946  0 toggle public Entity getEntityByMostOptimalMatchedId(String chainId)
947    {
948    // jalview.bin.Console.outPrintln("---> advanced greedy entityId matching
949    // block
950    // entered..");
951  0 List<Entity> entities = siftsEntry.getEntity();
952  0 SiftsEntitySortPojo[] sPojo = new SiftsEntitySortPojo[entities.size()];
953  0 int count = 0;
954  0 for (Entity entity : entities)
955    {
956  0 sPojo[count] = new SiftsEntitySortPojo();
957  0 sPojo[count].entityId = entity.getEntityId();
958   
959  0 List<Segment> segments = entity.getSegment();
960  0 for (Segment segment : segments)
961    {
962  0 List<Residue> residues = segment.getListResidue().getResidue();
963  0 for (Residue residue : residues)
964    {
965  0 List<CrossRefDb> cRefDbs = residue.getCrossRefDb();
966  0 for (CrossRefDb cRefDb : cRefDbs)
967    {
968  0 if (!cRefDb.getDbSource().equalsIgnoreCase("PDB"))
969    {
970  0 continue;
971    }
972  0 ++sPojo[count].resCount;
973  0 if (cRefDb.getDbChainId().equalsIgnoreCase(chainId))
974    {
975  0 ++sPojo[count].chainIdFreq;
976    }
977    }
978    }
979    }
980  0 sPojo[count].pid = (100 * sPojo[count].chainIdFreq)
981    / sPojo[count].resCount;
982  0 ++count;
983    }
984  0 Arrays.sort(sPojo, Collections.reverseOrder());
985    // jalview.bin.Console.outPrintln("highest matched entity : " +
986    // sPojo[0].entityId);
987    // jalview.bin.Console.outPrintln("highest matched pid : " + sPojo[0].pid);
988   
989  0 if (sPojo[0].entityId != null)
990    {
991  0 if (sPojo[0].pid < 1)
992    {
993  0 return null;
994    }
995  0 for (Entity entity : entities)
996    {
997  0 if (!entity.getEntityId().equalsIgnoreCase(sPojo[0].entityId))
998    {
999  0 continue;
1000    }
1001  0 return entity;
1002    }
1003    }
1004  0 return null;
1005    }
1006   
 
1007    private class SiftsEntitySortPojo
1008    implements Comparable<SiftsEntitySortPojo>
1009    {
1010    public String entityId;
1011   
1012    public int chainIdFreq;
1013   
1014    public int pid;
1015   
1016    public int resCount;
1017   
 
1018  0 toggle @Override
1019    public int compareTo(SiftsEntitySortPojo o)
1020    {
1021  0 return this.pid - o.pid;
1022    }
1023    }
1024   
 
1025    private class SegmentHelperPojo
1026    {
1027    private SequenceI seq;
1028   
1029    private HashMap<Integer, int[]> mapping;
1030   
1031    private TreeMap<Integer, String> resNumMap;
1032   
1033    private List<Integer> omitNonObserved;
1034   
1035    private int nonObservedShiftIndex;
1036   
1037    /**
1038    * count of number of 'not observed' positions in the PDB record's SEQRES
1039    * (total number of residues with coordinates == length(SEQRES) -
1040    * pdbeNonObserved
1041    */
1042    private int pdbeNonObserved;
1043   
 
1044  0 toggle public SegmentHelperPojo(SequenceI seq, HashMap<Integer, int[]> mapping,
1045    TreeMap<Integer, String> resNumMap,
1046    List<Integer> omitNonObserved, int nonObservedShiftIndex,
1047    int pdbeNonObserved)
1048    {
1049  0 setSeq(seq);
1050  0 setMapping(mapping);
1051  0 setResNumMap(resNumMap);
1052  0 setOmitNonObserved(omitNonObserved);
1053  0 setNonObservedShiftIndex(nonObservedShiftIndex);
1054  0 setPdbeNonObserved(pdbeNonObserved);
1055   
1056    }
1057   
 
1058  0 toggle public void setPdbeNonObserved(int pdbeNonObserved2)
1059    {
1060  0 this.pdbeNonObserved = pdbeNonObserved2;
1061    }
1062   
 
1063  0 toggle public int getPdbeNonObserved()
1064    {
1065  0 return pdbeNonObserved;
1066    }
 
1067  0 toggle public SequenceI getSeq()
1068    {
1069  0 return seq;
1070    }
1071   
 
1072  0 toggle public void setSeq(SequenceI seq)
1073    {
1074  0 this.seq = seq;
1075    }
1076   
 
1077  0 toggle public HashMap<Integer, int[]> getMapping()
1078    {
1079  0 return mapping;
1080    }
1081   
 
1082  0 toggle public void setMapping(HashMap<Integer, int[]> mapping)
1083    {
1084  0 this.mapping = mapping;
1085    }
1086   
 
1087  0 toggle public TreeMap<Integer, String> getResNumMap()
1088    {
1089  0 return resNumMap;
1090    }
1091   
 
1092  0 toggle public void setResNumMap(TreeMap<Integer, String> resNumMap)
1093    {
1094  0 this.resNumMap = resNumMap;
1095    }
1096   
 
1097  0 toggle public List<Integer> getOmitNonObserved()
1098    {
1099  0 return omitNonObserved;
1100    }
1101   
 
1102  0 toggle public void setOmitNonObserved(List<Integer> omitNonObserved)
1103    {
1104  0 this.omitNonObserved = omitNonObserved;
1105    }
1106   
 
1107  0 toggle public int getNonObservedShiftIndex()
1108    {
1109  0 return nonObservedShiftIndex;
1110    }
1111   
 
1112  0 toggle public void setNonObservedShiftIndex(int nonObservedShiftIndex)
1113    {
1114  0 this.nonObservedShiftIndex = nonObservedShiftIndex;
1115    }
1116   
1117    }
1118   
 
1119  0 toggle @Override
1120    public StringBuilder getMappingOutput(MappingOutputPojo mp)
1121    throws SiftsException
1122    {
1123  0 String seqRes = mp.getSeqResidue();
1124  0 String seqName = mp.getSeqName();
1125  0 int sStart = mp.getSeqStart();
1126  0 int sEnd = mp.getSeqEnd();
1127   
1128  0 String strRes = mp.getStrResidue();
1129  0 String strName = mp.getStrName();
1130  0 int pdbStart = mp.getStrStart();
1131  0 int pdbEnd = mp.getStrEnd();
1132   
1133  0 String type = mp.getType();
1134   
1135  0 int maxid = (seqName.length() >= strName.length()) ? seqName.length()
1136    : strName.length();
1137  0 int len = 72 - maxid - 1;
1138   
1139  0 int nochunks = ((seqRes.length()) / len)
1140  0 + ((seqRes.length()) % len > 0 ? 1 : 0);
1141    // output mappings
1142  0 StringBuilder output = new StringBuilder(512);
1143  0 output.append(NEWLINE);
1144  0 output.append("Sequence \u27f7 Structure mapping details")
1145    .append(NEWLINE);
1146  0 output.append("Method: SIFTS");
1147  0 output.append(NEWLINE).append(NEWLINE);
1148   
1149  0 output.append(new Format("%" + maxid + "s").form(seqName));
1150  0 output.append(" : ");
1151  0 output.append(String.valueOf(sStart));
1152  0 output.append(" - ");
1153  0 output.append(String.valueOf(sEnd));
1154  0 output.append(" Maps to ");
1155  0 output.append(NEWLINE);
1156  0 output.append(new Format("%" + maxid + "s").form(structId));
1157  0 output.append(" : ");
1158  0 output.append(String.valueOf(pdbStart));
1159  0 output.append(" - ");
1160  0 output.append(String.valueOf(pdbEnd));
1161  0 output.append(NEWLINE).append(NEWLINE);
1162   
1163  0 ScoreMatrix pam250 = ScoreModels.getInstance().getPam250();
1164  0 int matchedSeqCount = 0;
1165  0 for (int j = 0; j < nochunks; j++)
1166    {
1167    // Print the first aligned sequence
1168  0 output.append(new Format("%" + (maxid) + "s").form(seqName))
1169    .append(" ");
1170   
1171  0 for (int i = 0; i < len; i++)
1172    {
1173  0 if ((i + (j * len)) < seqRes.length())
1174    {
1175  0 output.append(seqRes.charAt(i + (j * len)));
1176    }
1177    }
1178   
1179  0 output.append(NEWLINE);
1180  0 output.append(new Format("%" + (maxid) + "s").form(" ")).append(" ");
1181   
1182    /*
1183    * Print out the match symbols:
1184    * | for exact match (ignoring case)
1185    * . if PAM250 score is positive
1186    * else a space
1187    */
1188  0 for (int i = 0; i < len; i++)
1189    {
1190  0 try
1191    {
1192  0 if ((i + (j * len)) < seqRes.length())
1193    {
1194  0 char c1 = seqRes.charAt(i + (j * len));
1195  0 char c2 = strRes.charAt(i + (j * len));
1196  0 boolean sameChar = Comparison.isSameResidue(c1, c2, false);
1197  0 if (sameChar && !Comparison.isGap(c1))
1198    {
1199  0 matchedSeqCount++;
1200  0 output.append("|");
1201    }
1202  0 else if (type.equals("pep"))
1203    {
1204  0 if (pam250.getPairwiseScore(c1, c2) > 0)
1205    {
1206  0 output.append(".");
1207    }
1208    else
1209    {
1210  0 output.append(" ");
1211    }
1212    }
1213    else
1214    {
1215  0 output.append(" ");
1216    }
1217    }
1218    } catch (IndexOutOfBoundsException e)
1219    {
1220  0 continue;
1221    }
1222    }
1223    // Now print the second aligned sequence
1224  0 output = output.append(NEWLINE);
1225  0 output = output.append(new Format("%" + (maxid) + "s").form(strName))
1226    .append(" ");
1227  0 for (int i = 0; i < len; i++)
1228    {
1229  0 if ((i + (j * len)) < strRes.length())
1230    {
1231  0 output.append(strRes.charAt(i + (j * len)));
1232    }
1233    }
1234  0 output.append(NEWLINE).append(NEWLINE);
1235    }
1236  0 float pid = (float) matchedSeqCount / seqRes.length() * 100;
1237  0 if (pid < SiftsSettings.getFailSafePIDThreshold())
1238    {
1239  0 throw new SiftsException(">>> Low PID detected for SIFTs mapping...");
1240    }
1241  0 output.append("Length of alignment = " + seqRes.length())
1242    .append(NEWLINE);
1243  0 output.append(new Format("Percentage ID = %2.2f").form(pid));
1244  0 return output;
1245    }
1246   
 
1247  0 toggle @Override
1248    public int getEntityCount()
1249    {
1250  0 return siftsEntry.getEntity().size();
1251    }
1252   
 
1253  0 toggle @Override
1254    public String getDbAccessionId()
1255    {
1256  0 return siftsEntry.getDbAccessionId();
1257    }
1258   
 
1259  0 toggle @Override
1260    public String getDbCoordSys()
1261    {
1262  0 return siftsEntry.getDbCoordSys();
1263    }
1264   
 
1265  0 toggle @Override
1266    public String getDbSource()
1267    {
1268  0 return siftsEntry.getDbSource();
1269    }
1270   
 
1271  0 toggle @Override
1272    public String getDbVersion()
1273    {
1274  0 return siftsEntry.getDbVersion();
1275    }
1276   
 
1277  0 toggle public static void setMockSiftsFile(File file)
1278    {
1279  0 mockSiftsFile = file;
1280    }
1281   
1282    }