Clover icon

Coverage Report

  1. Project Clover database Wed Nov 13 2024 18:27:33 GMT
  2. Package jalview.ws.sifts

File SiftsClient.java

 

Coverage histogram

../../../img/srcFileCovDistChart1.png
56% of files have more coverage

Code metrics

138
411
50
5
1,309
965
146
0.36
8.22
10
2.92

Classes

Class Line # Actions
SiftsClient 82 388 128
0.0663082456.6%
SiftsClient.CoordinateSys 128 2 2
0.550%
SiftsClient.ResidueDetailType 145 2 2
0.00%
SiftsClient.SiftsEntitySortPojo 1033 1 1
0.00%
SiftsClient.SegmentHelperPojo 1051 18 13
0.00%
 

Contributing tests

This file is covered by 1 test. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.ws.sifts;
22   
23    import java.io.File;
24    import java.io.FileInputStream;
25    import java.io.FileOutputStream;
26    import java.io.IOException;
27    import java.io.InputStream;
28    import java.io.PrintStream;
29    import java.net.URL;
30    import java.net.URLConnection;
31    import java.nio.file.Files;
32    import java.nio.file.Path;
33    import java.nio.file.attribute.BasicFileAttributes;
34    import java.util.ArrayList;
35    import java.util.Arrays;
36    import java.util.Collection;
37    import java.util.Collections;
38    import java.util.Date;
39    import java.util.HashMap;
40    import java.util.HashSet;
41    import java.util.List;
42    import java.util.Locale;
43    import java.util.Map;
44    import java.util.Set;
45    import java.util.TreeMap;
46    import java.util.zip.GZIPInputStream;
47   
48    import javax.xml.bind.JAXBContext;
49    import javax.xml.bind.JAXBElement;
50    import javax.xml.bind.Unmarshaller;
51    import javax.xml.stream.XMLInputFactory;
52    import javax.xml.stream.XMLStreamReader;
53   
54    import jalview.analysis.AlignSeq;
55    import jalview.analysis.scoremodels.ScoreMatrix;
56    import jalview.analysis.scoremodels.ScoreModels;
57    import jalview.api.DBRefEntryI;
58    import jalview.api.SiftsClientI;
59    import jalview.bin.Console;
60    import jalview.datamodel.DBRefEntry;
61    import jalview.datamodel.DBRefSource;
62    import jalview.datamodel.SequenceI;
63    import jalview.io.BackupFiles;
64    import jalview.io.StructureFile;
65    import jalview.schemes.ResidueProperties;
66    import jalview.structure.StructureMapping;
67    import jalview.util.Comparison;
68    import jalview.util.DBRefUtils;
69    import jalview.util.Format;
70    import jalview.util.HttpUtils;
71    import jalview.util.Platform;
72    import jalview.xml.binding.sifts.Entry;
73    import jalview.xml.binding.sifts.Entry.Entity;
74    import jalview.xml.binding.sifts.Entry.Entity.Segment;
75    import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion;
76    import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue;
77    import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb;
78    import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail;
79    import mc_view.Atom;
80    import mc_view.PDBChain;
81   
 
82    public class SiftsClient implements SiftsClientI
83    {
84    /*
85    * for use in mocking out file fetch for tests only
86    * - reset to null after testing!
87    */
88    private static File mockSiftsFile;
89   
90    private Entry siftsEntry;
91   
92    private StructureFile pdb;
93   
94    private String pdbId;
95   
96    private String structId;
97   
98    private CoordinateSys seqCoordSys = CoordinateSys.UNIPROT;
99   
100    /**
101    * PDB sequence position to sequence coordinate mapping as derived from SIFTS
102    * record for the identified SeqCoordSys Used for lift-over from sequence
103    * derived from PDB (with first extracted PDBRESNUM as 'start' to the sequence
104    * being annotated with PDB data
105    */
106    private jalview.datamodel.Mapping seqFromPdbMapping;
107   
108    private static final int BUFFER_SIZE = 4096;
109   
110    public static final int UNASSIGNED = Integer.MIN_VALUE;
111   
112    private static final int PDB_RES_POS = 0;
113   
114    private static final int PDB_ATOM_POS = 1;
115   
116    private static final int PDBE_POS = 2;
117   
118    private static final String NOT_OBSERVED = "Not_Observed";
119   
120    private static final String SIFTS_SPLIT_FTP_BASE_URL = "https://ftp.ebi.ac.uk/pub/databases/msd/sifts/split_xml/";
121   
122    private final static String NEWLINE = System.lineSeparator();
123   
124    private String curSourceDBRef;
125   
126    private HashSet<String> curDBRefAccessionIdsString;
127   
 
128    private enum CoordinateSys
129    {
130    UNIPROT("UniProt"), PDB("PDBresnum"), PDBe("PDBe");
131   
132    private String name;
133   
 
134  12 toggle private CoordinateSys(String name)
135    {
136  12 this.name = name;
137    }
138   
 
139  0 toggle public String getName()
140    {
141  0 return name;
142    }
143    };
144   
 
145    private enum ResidueDetailType
146    {
147    NAME_SEC_STRUCTURE("nameSecondaryStructure"),
148    CODE_SEC_STRUCTURE("codeSecondaryStructure"), ANNOTATION("Annotation");
149   
150    private String code;
151   
 
152  0 toggle private ResidueDetailType(String code)
153    {
154  0 this.code = code;
155    }
156   
 
157  0 toggle public String getCode()
158    {
159  0 return code;
160    }
161    };
162   
163    /**
164    * Fetch SIFTs file for the given PDBfile and construct an instance of
165    * SiftsClient
166    *
167    * @param pdbId
168    * @throws SiftsException
169    */
 
170  4 toggle public SiftsClient(StructureFile pdb) throws SiftsException
171    {
172  4 this.pdb = pdb;
173  4 this.pdbId = pdb.getId();
174  4 File siftsFile = getSiftsFile(pdbId);
175  4 siftsEntry = parseSIFTs(siftsFile);
176    }
177   
178    /**
179    * Parse the given SIFTs File and return a JAXB POJO of parsed data
180    *
181    * @param siftFile
182    * - the GZipped SIFTs XML file to parse
183    * @return
184    * @throws Exception
185    * if a problem occurs while parsing the SIFTs XML
186    */
 
187  4 toggle private Entry parseSIFTs(File siftFile) throws SiftsException
188    {
189  4 try (InputStream in = new FileInputStream(siftFile);
190  4 GZIPInputStream gzis = new GZIPInputStream(in);)
191    {
192    // jalview.bin.Console.outPrintln("File : " + siftFile.getAbsolutePath());
193  4 JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.sifts");
194  4 XMLStreamReader streamReader = XMLInputFactory.newInstance()
195    .createXMLStreamReader(gzis);
196  4 Unmarshaller um = jc.createUnmarshaller();
197  4 JAXBElement<Entry> jbe = um.unmarshal(streamReader, Entry.class);
198  4 return jbe.getValue();
199    } catch (Exception e)
200    {
201  0 e.printStackTrace();
202  0 throw new SiftsException(e.getMessage());
203    }
204    }
205   
206    /**
207    * Get a SIFTs XML file for a given PDB Id from Cache or download from FTP
208    * repository if not found in cache
209    *
210    * @param pdbId
211    * @return SIFTs XML file
212    * @throws SiftsException
213    */
 
214  4 toggle public static File getSiftsFile(String pdbId) throws SiftsException
215    {
216    /*
217    * return mocked file if it has been set
218    */
219  4 if (mockSiftsFile != null)
220    {
221  0 return mockSiftsFile;
222    }
223   
224  4 String siftsFileName = SiftsSettings.getSiftDownloadDirectory()
225    + pdbId.toLowerCase(Locale.ROOT) + ".xml.gz";
226  4 File siftsFile = new File(siftsFileName);
227  4 if (siftsFile.exists())
228    {
229    // The line below is required for unit testing... don't comment it out!!!
230  4 jalview.bin.Console
231    .outPrintln(">>> SIFTS File already downloaded for " + pdbId);
232   
233  4 if (isFileOlderThanThreshold(siftsFile,
234    SiftsSettings.getCacheThresholdInDays()))
235    {
236  0 File oldSiftsFile = new File(siftsFileName + "_old");
237  0 BackupFiles.moveFileToFile(siftsFile, oldSiftsFile);
238  0 try
239    {
240  0 siftsFile = downloadSiftsFile(pdbId.toLowerCase(Locale.ROOT));
241  0 oldSiftsFile.delete();
242  0 return siftsFile;
243    } catch (IOException e)
244    {
245  0 e.printStackTrace();
246  0 BackupFiles.moveFileToFile(oldSiftsFile, siftsFile);
247  0 return new File(siftsFileName);
248    }
249    }
250    else
251    {
252  4 return siftsFile;
253    }
254    }
255  0 try
256    {
257  0 siftsFile = downloadSiftsFile(pdbId.toLowerCase(Locale.ROOT));
258    } catch (IOException e)
259    {
260  0 throw new SiftsException(e.getMessage());
261    }
262  0 return siftsFile;
263    }
264   
265    /**
266    * This method enables checking if a cached file has exceeded a certain
267    * threshold(in days)
268    *
269    * @param file
270    * the cached file
271    * @param noOfDays
272    * the threshold in days
273    * @return
274    */
 
275  4 toggle public static boolean isFileOlderThanThreshold(File file, int noOfDays)
276    {
277  4 Path filePath = file.toPath();
278  4 BasicFileAttributes attr;
279  4 int diffInDays = 0;
280  4 try
281    {
282  4 attr = Files.readAttributes(filePath, BasicFileAttributes.class);
283  4 diffInDays = (int) ((new Date().getTime()
284    - attr.lastModifiedTime().toMillis())
285    / (1000 * 60 * 60 * 24));
286    // jalview.bin.Console.outPrintln("Diff in days : " + diffInDays);
287    } catch (IOException e)
288    {
289  0 e.printStackTrace();
290    }
291  4 return noOfDays <= diffInDays;
292    }
293   
294    /**
295    * Download a SIFTs XML file for a given PDB Id from an FTP repository
296    *
297    * @param pdbId
298    * @return downloaded SIFTs XML file
299    * @throws SiftsException
300    * @throws IOException
301    */
 
302  0 toggle public static File downloadSiftsFile(String pdbId)
303    throws SiftsException, IOException
304    {
305  0 if (pdbId.contains(".cif"))
306    {
307  0 pdbId = pdbId.replace(".cif", "");
308    }
309  0 String siftFile = pdbId + ".xml.gz";
310  0 String siftsFileFTPURL = getDownloadUrlFor(siftFile);
311   
312    /*
313    * Download the file from URL to either
314    * Java: directory of cached downloaded SIFTS files
315    * Javascript: temporary 'file' (in-memory cache)
316    */
317  0 File downloadTo = null;
318  0 if (Platform.isJS())
319    {
320  0 downloadTo = File.createTempFile(siftFile, ".xml.gz");
321    }
322    else
323    {
324  0 downloadTo = new File(
325    SiftsSettings.getSiftDownloadDirectory() + siftFile);
326  0 File siftsDownloadDir = new File(
327    SiftsSettings.getSiftDownloadDirectory());
328  0 if (!siftsDownloadDir.exists())
329    {
330  0 siftsDownloadDir.mkdirs();
331    }
332    }
333   
334    // jalview.bin.Console.outPrintln(">> Download ftp url : " +
335    // siftsFileFTPURL);
336    // long now = System.currentTimeMillis();
337  0 URL url = new URL(siftsFileFTPURL);
338  0 URLConnection conn = HttpUtils.openConnection(url);
339  0 InputStream inputStream = conn.getInputStream();
340  0 FileOutputStream outputStream = new FileOutputStream(downloadTo);
341  0 byte[] buffer = new byte[BUFFER_SIZE];
342  0 int bytesRead = -1;
343  0 while ((bytesRead = inputStream.read(buffer)) != -1)
344    {
345  0 outputStream.write(buffer, 0, bytesRead);
346    }
347  0 outputStream.close();
348  0 inputStream.close();
349    // jalview.bin.Console.outPrintln(">>> File downloaded : " +
350    // downloadedSiftsFile
351    // + " took " + (System.currentTimeMillis() - now) + "ms");
352  0 return downloadTo;
353    }
354   
 
355  1 toggle public static String getDownloadUrlFor(String siftFile)
356    {
357  1 String durl = SIFTS_SPLIT_FTP_BASE_URL + siftFile.substring(1, 3) + "/"
358    + siftFile;
359  1 Console.trace("SIFTS URL for " + siftFile + " is " + durl);
360  1 return durl;
361   
362    }
363   
364    /**
365    * Delete the SIFTs file for the given PDB Id in the local SIFTs download
366    * directory
367    *
368    * @param pdbId
369    * @return true if the file was deleted or doesn't exist
370    */
 
371  0 toggle public static boolean deleteSiftsFileByPDBId(String pdbId)
372    {
373  0 File siftsFile = new File(SiftsSettings.getSiftDownloadDirectory()
374    + pdbId.toLowerCase(Locale.ROOT) + ".xml.gz");
375  0 if (siftsFile.exists())
376    {
377  0 return siftsFile.delete();
378    }
379  0 return true;
380    }
381   
382    /**
383    * Get a valid SIFTs DBRef for the given sequence current SIFTs entry
384    *
385    * @param seq
386    * - the target sequence for the operation
387    * @return a valid DBRefEntry that is SIFTs compatible
388    * @throws Exception
389    * if no valid source DBRefEntry was found for the given sequences
390    */
 
391  0 toggle public DBRefEntryI getValidSourceDBRef(SequenceI seq)
392    throws SiftsException
393    {
394  0 List<DBRefEntry> dbRefs = seq.getPrimaryDBRefs();
395  0 if (dbRefs == null || dbRefs.size() < 1)
396    {
397  0 throw new SiftsException(
398    "Source DBRef could not be determined. DBRefs might not have been retrieved.");
399    }
400   
401  0 for (DBRefEntry dbRef : dbRefs)
402    {
403  0 if (dbRef == null || dbRef.getAccessionId() == null
404    || dbRef.getSource() == null)
405    {
406  0 continue;
407    }
408  0 String canonicalSource = DBRefUtils
409    .getCanonicalName(dbRef.getSource());
410  0 if (isValidDBRefEntry(dbRef)
411    && (canonicalSource.equalsIgnoreCase(DBRefSource.UNIPROT)
412    || canonicalSource.equalsIgnoreCase(DBRefSource.PDB)))
413    {
414  0 return dbRef;
415    }
416    }
417  0 throw new SiftsException("Could not get source DB Ref");
418    }
419   
420    /**
421    * Check that the DBRef Entry is properly populated and is available in this
422    * SiftClient instance
423    *
424    * @param entry
425    * - DBRefEntry to validate
426    * @return true validation is successful otherwise false is returned.
427    */
 
428  0 toggle boolean isValidDBRefEntry(DBRefEntryI entry)
429    {
430  0 return entry != null && entry.getAccessionId() != null
431    && isFoundInSiftsEntry(entry.getAccessionId());
432    }
433   
 
434  0 toggle @Override
435    public HashSet<String> getAllMappingAccession()
436    {
437  0 HashSet<String> accessions = new HashSet<String>();
438  0 List<Entity> entities = siftsEntry.getEntity();
439  0 for (Entity entity : entities)
440    {
441  0 List<Segment> segments = entity.getSegment();
442  0 for (Segment segment : segments)
443    {
444  0 List<MapRegion> mapRegions = segment.getListMapRegion()
445    .getMapRegion();
446  0 for (MapRegion mapRegion : mapRegions)
447    {
448  0 accessions.add(mapRegion.getDb().getDbAccessionId()
449    .toLowerCase(Locale.ROOT));
450    }
451    }
452    }
453  0 return accessions;
454    }
455   
 
456  0 toggle @Override
457    public StructureMapping getSiftsStructureMapping(SequenceI seq,
458    String pdbFile, String chain) throws SiftsException
459    {
460  0 SequenceI aseq = seq;
461  0 while (seq.getDatasetSequence() != null)
462    {
463  0 seq = seq.getDatasetSequence();
464    }
465  0 structId = (chain == null) ? pdbId : pdbId + "|" + chain;
466  0 jalview.bin.Console.outPrintln("Getting SIFTS mapping for " + structId
467    + ": seq " + seq.getName());
468   
469  0 final StringBuilder mappingDetails = new StringBuilder(128);
470  0 PrintStream ps = new PrintStream(System.out)
471    {
 
472  0 toggle @Override
473    public void print(String x)
474    {
475  0 mappingDetails.append(x);
476    }
477   
 
478  0 toggle @Override
479    public void println()
480    {
481  0 mappingDetails.append(NEWLINE);
482    }
483    };
484  0 HashMap<Integer, int[]> mapping = getGreedyMapping(chain, seq, ps);
485   
486  0 String mappingOutput = mappingDetails.toString();
487  0 StructureMapping siftsMapping = new StructureMapping(aseq, pdbFile,
488    pdbId, chain, mapping, mappingOutput, seqFromPdbMapping);
489   
490  0 return siftsMapping;
491    }
492   
 
493  0 toggle @Override
494    public HashMap<Integer, int[]> getGreedyMapping(String entityId,
495    SequenceI seq, java.io.PrintStream os) throws SiftsException
496    {
497  0 List<Integer> omitNonObserved = new ArrayList<>();
498  0 int nonObservedShiftIndex = 0, pdbeNonObserved = 0;
499    // jalview.bin.Console.outPrintln("Generating mappings for : " + entityId);
500  0 Entity entity = null;
501  0 entity = getEntityById(entityId);
502  0 String originalSeq = AlignSeq.extractGaps(
503    jalview.util.Comparison.GapChars, seq.getSequenceAsString());
504  0 HashMap<Integer, int[]> mapping = new HashMap<Integer, int[]>();
505  0 DBRefEntryI sourceDBRef;
506  0 sourceDBRef = getValidSourceDBRef(seq);
507    // TODO ensure sequence start/end is in the same coordinate system and
508    // consistent with the choosen sourceDBRef
509   
510    // set sequence coordinate system - default value is UniProt
511  0 if (sourceDBRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
512    {
513  0 seqCoordSys = CoordinateSys.PDB;
514    }
515   
516  0 HashSet<String> dbRefAccessionIdsString = new HashSet<String>();
517  0 for (DBRefEntry dbref : seq.getDBRefs())
518    {
519  0 dbRefAccessionIdsString
520    .add(dbref.getAccessionId().toLowerCase(Locale.ROOT));
521    }
522  0 dbRefAccessionIdsString
523    .add(sourceDBRef.getAccessionId().toLowerCase(Locale.ROOT));
524   
525  0 curDBRefAccessionIdsString = dbRefAccessionIdsString;
526  0 curSourceDBRef = sourceDBRef.getAccessionId();
527   
528  0 TreeMap<Integer, String> resNumMap = new TreeMap<Integer, String>();
529  0 List<Segment> segments = entity.getSegment();
530  0 SegmentHelperPojo shp = new SegmentHelperPojo(seq, mapping, resNumMap,
531    omitNonObserved, nonObservedShiftIndex, pdbeNonObserved);
532  0 processSegments(segments, shp);
533  0 try
534    {
535  0 populateAtomPositions(entityId, mapping);
536    } catch (Exception e)
537    {
538  0 e.printStackTrace();
539    }
540  0 if (seqCoordSys == CoordinateSys.UNIPROT)
541    {
542  0 padWithGaps(resNumMap, omitNonObserved);
543    }
544  0 int seqStart = UNASSIGNED;
545  0 int seqEnd = UNASSIGNED;
546  0 int pdbStart = UNASSIGNED;
547  0 int pdbEnd = UNASSIGNED;
548   
549  0 if (mapping.isEmpty())
550    {
551  0 throw new SiftsException("SIFTS mapping failed for " + entityId
552    + " and " + seq.getName());
553    }
554    // also construct a mapping object between the seq-coord sys and the PDB
555    // seq's coord sys
556   
557  0 Integer[] keys = mapping.keySet().toArray(new Integer[0]);
558  0 Arrays.sort(keys);
559  0 seqStart = keys[0];
560  0 seqEnd = keys[keys.length - 1];
561  0 List<int[]> from = new ArrayList<>(), to = new ArrayList<>();
562  0 int[] _cfrom = null, _cto = null;
563  0 String matchedSeq = originalSeq;
564  0 if (seqStart != UNASSIGNED) // fixme! seqStart can map to -1 for a pdb
565    // sequence that starts <-1
566    {
567  0 for (int seqps : keys)
568    {
569  0 int pdbpos = mapping.get(seqps)[PDBE_POS];
570  0 if (pdbpos == UNASSIGNED)
571    {
572    // not correct - pdbpos might be -1, but leave it for now
573  0 continue;
574    }
575  0 if (_cfrom == null || seqps != _cfrom[1] + 1)
576    {
577  0 _cfrom = new int[] { seqps, seqps };
578  0 from.add(_cfrom);
579  0 _cto = null; // discontinuity
580    }
581    else
582    {
583  0 _cfrom[1] = seqps;
584    }
585  0 if (_cto == null || pdbpos != 1 + _cto[1])
586    {
587  0 _cto = new int[] { pdbpos, pdbpos };
588  0 to.add(_cto);
589    }
590    else
591    {
592  0 _cto[1] = pdbpos;
593    }
594    }
595  0 _cfrom = new int[from.size() * 2];
596  0 _cto = new int[to.size() * 2];
597  0 int p = 0;
598  0 for (int[] range : from)
599    {
600  0 _cfrom[p++] = range[0];
601  0 _cfrom[p++] = range[1];
602    }
603  0 ;
604  0 p = 0;
605  0 for (int[] range : to)
606    {
607  0 _cto[p++] = range[0];
608  0 _cto[p++] = range[1];
609    }
610  0 ;
611   
612  0 seqFromPdbMapping = new jalview.datamodel.Mapping(null, _cto, _cfrom,
613    1, 1);
614  0 pdbStart = mapping.get(seqStart)[PDB_RES_POS];
615  0 pdbEnd = mapping.get(seqEnd)[PDB_RES_POS];
616  0 int orignalSeqStart = seq.getStart();
617  0 if (orignalSeqStart >= 1)
618    {
619  0 int subSeqStart = (seqStart >= orignalSeqStart)
620    ? seqStart - orignalSeqStart
621    : 0;
622  0 int subSeqEnd = seqEnd - (orignalSeqStart - 1);
623  0 subSeqEnd = originalSeq.length() < subSeqEnd ? originalSeq.length()
624    : subSeqEnd;
625  0 matchedSeq = originalSeq.substring(subSeqStart, subSeqEnd);
626    }
627    else
628    {
629  0 matchedSeq = originalSeq.substring(1, originalSeq.length());
630    }
631    }
632   
633  0 StringBuilder targetStrucSeqs = new StringBuilder();
634  0 for (String res : resNumMap.values())
635    {
636  0 targetStrucSeqs.append(res);
637    }
638   
639  0 if (os != null)
640    {
641  0 MappingOutputPojo mop = new MappingOutputPojo();
642  0 mop.setSeqStart(seqStart);
643  0 mop.setSeqEnd(seqEnd);
644  0 mop.setSeqName(seq.getName());
645  0 mop.setSeqResidue(matchedSeq);
646   
647  0 mop.setStrStart(pdbStart);
648  0 mop.setStrEnd(pdbEnd);
649  0 mop.setStrName(structId);
650  0 mop.setStrResidue(targetStrucSeqs.toString());
651   
652  0 mop.setType("pep");
653  0 os.print(getMappingOutput(mop).toString());
654  0 os.println();
655    }
656  0 return mapping;
657    }
658   
 
659  0 toggle void processSegments(List<Segment> segments, SegmentHelperPojo shp)
660    {
661  0 SequenceI seq = shp.getSeq();
662  0 HashMap<Integer, int[]> mapping = shp.getMapping();
663  0 TreeMap<Integer, String> resNumMap = shp.getResNumMap();
664  0 List<Integer> omitNonObserved = shp.getOmitNonObserved();
665  0 int nonObservedShiftIndex = shp.getNonObservedShiftIndex();
666  0 int pdbeNonObservedCount = shp.getPdbeNonObserved();
667  0 int firstPDBResNum = UNASSIGNED;
668  0 for (Segment segment : segments)
669    {
670    // jalview.bin.Console.outPrintln("Mapping segments : " +
671    // segment.getSegId() + "\\"s
672    // + segStartEnd);
673  0 List<Residue> residues = segment.getListResidue().getResidue();
674  0 for (Residue residue : residues)
675    {
676  0 boolean isObserved = isResidueObserved(residue);
677  0 int pdbeIndex = getLeadingIntegerValue(residue.getDbResNum(),
678    UNASSIGNED);
679  0 int currSeqIndex = UNASSIGNED;
680  0 List<CrossRefDb> cRefDbs = residue.getCrossRefDb();
681  0 CrossRefDb pdbRefDb = null;
682  0 for (CrossRefDb cRefDb : cRefDbs)
683    {
684  0 if (cRefDb.getDbSource().equalsIgnoreCase(DBRefSource.PDB))
685    {
686  0 pdbRefDb = cRefDb;
687  0 if (firstPDBResNum == UNASSIGNED)
688    {
689  0 firstPDBResNum = getLeadingIntegerValue(cRefDb.getDbResNum(),
690    UNASSIGNED);
691    }
692    else
693    {
694  0 if (isObserved)
695    {
696    // after we find the first observed residue we just increment
697  0 firstPDBResNum++;
698    }
699    }
700    }
701  0 if (cRefDb.getDbCoordSys().equalsIgnoreCase(seqCoordSys.getName())
702    && isAccessionMatched(cRefDb.getDbAccessionId()))
703    {
704  0 currSeqIndex = getLeadingIntegerValue(cRefDb.getDbResNum(),
705    UNASSIGNED);
706  0 if (pdbRefDb != null)
707    {
708  0 break;// exit loop if pdb and uniprot are already found
709    }
710    }
711    }
712  0 if (!isObserved)
713    {
714  0 ++pdbeNonObservedCount;
715    }
716  0 if (seqCoordSys == seqCoordSys.PDB) // FIXME: is seqCoordSys ever PDBe
717    // ???
718    {
719    // if the sequence has a primary reference to the PDB, then we are
720    // dealing with a sequence extracted directly from the PDB. In that
721    // case, numbering is PDBe - non-observed residues
722  0 currSeqIndex = seq.getStart() - 1 + pdbeIndex;
723    }
724  0 if (!isObserved)
725    {
726  0 if (seqCoordSys != CoordinateSys.UNIPROT) // FIXME: PDB or PDBe only
727    // here
728    {
729    // mapping to PDB or PDBe so we need to bookkeep for the
730    // non-observed
731    // SEQRES positions
732  0 omitNonObserved.add(currSeqIndex);
733  0 ++nonObservedShiftIndex;
734    }
735    }
736  0 if (currSeqIndex == UNASSIGNED)
737    {
738    // change in logic - unobserved residues with no currSeqIndex
739    // corresponding are still counted in both nonObservedShiftIndex and
740    // pdbeIndex...
741  0 continue;
742    }
743    // if (currSeqIndex >= seq.getStart() && currSeqIndex <= seqlength) //
744    // true
745    // numbering
746    // is
747    // not
748    // up
749    // to
750    // seq.getEnd()
751    {
752   
753  0 int resNum = (pdbRefDb == null)
754    ? getLeadingIntegerValue(residue.getDbResNum(),
755    UNASSIGNED)
756    : getLeadingIntegerValue(pdbRefDb.getDbResNum(),
757    UNASSIGNED);
758   
759  0 if (isObserved)
760    {
761  0 char resCharCode = ResidueProperties
762    .getSingleCharacterCode(ResidueProperties
763    .getCanonicalAminoAcid(residue.getDbResName()));
764  0 resNumMap.put(currSeqIndex, String.valueOf(resCharCode));
765   
766  0 int[] mappingcols = new int[] { Integer.valueOf(resNum),
767  0 UNASSIGNED, isObserved ? firstPDBResNum : UNASSIGNED };
768   
769  0 mapping.put(currSeqIndex - nonObservedShiftIndex, mappingcols);
770    }
771    }
772    }
773    }
774    }
775   
776    /**
777    * Get the leading integer part of a string that begins with an integer.
778    *
779    * @param input
780    * - the string input to process
781    * @param failValue
782    * - value returned if unsuccessful
783    * @return
784    */
 
785  0 toggle static int getLeadingIntegerValue(String input, int failValue)
786    {
787  0 if (input == null)
788    {
789  0 return failValue;
790    }
791  0 String[] parts = input.split("(?=\\D)(?<=\\d)");
792  0 if (parts != null && parts.length > 0 && parts[0].matches("[0-9]+"))
793    {
794  0 return Integer.valueOf(parts[0]);
795    }
796  0 return failValue;
797    }
798   
799    /**
800    *
801    * @param chainId
802    * Target chain to populate mapping of its atom positions.
803    * @param mapping
804    * Two dimension array of residue index versus atom position
805    * @throws IllegalArgumentException
806    * Thrown if chainId or mapping is null
807    * @throws SiftsException
808    */
 
809  0 toggle void populateAtomPositions(String chainId, Map<Integer, int[]> mapping)
810    throws IllegalArgumentException, SiftsException
811    {
812  0 try
813    {
814  0 PDBChain chain = pdb.findChain(chainId);
815   
816  0 if (chain == null || mapping == null)
817    {
818  0 throw new IllegalArgumentException(
819    "Chain id or mapping must not be null.");
820    }
821  0 for (int[] map : mapping.values())
822    {
823  0 if (map[PDB_RES_POS] != UNASSIGNED)
824    {
825  0 map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms);
826    }
827    }
828    } catch (NullPointerException e)
829    {
830  0 throw new SiftsException(e.getMessage());
831    } catch (Exception e)
832    {
833  0 throw new SiftsException(e.getMessage());
834    }
835    }
836   
837    /**
838    *
839    * @param residueIndex
840    * The residue index used for the search
841    * @param atoms
842    * A collection of Atom to search
843    * @return atom position for the given residue index
844    */
 
845  0 toggle int getAtomIndex(int residueIndex, Collection<Atom> atoms)
846    {
847  0 if (atoms == null)
848    {
849  0 throw new IllegalArgumentException(
850    "atoms collection must not be null!");
851    }
852  0 for (Atom atom : atoms)
853    {
854  0 if (atom.resNumber == residueIndex)
855    {
856  0 return atom.atomIndex;
857    }
858    }
859  0 return UNASSIGNED;
860    }
861   
862    /**
863    * Checks if the residue instance is marked 'Not_observed' or not
864    *
865    * @param residue
866    * @return
867    */
 
868  0 toggle private boolean isResidueObserved(Residue residue)
869    {
870  0 Set<String> annotations = getResidueAnnotaitons(residue,
871    ResidueDetailType.ANNOTATION);
872  0 if (annotations == null || annotations.isEmpty())
873    {
874  0 return true;
875    }
876  0 for (String annotation : annotations)
877    {
878  0 if (annotation.equalsIgnoreCase(NOT_OBSERVED))
879    {
880  0 return false;
881    }
882    }
883  0 return true;
884    }
885   
886    /**
887    * Get annotation String for a given residue and annotation type
888    *
889    * @param residue
890    * @param type
891    * @return
892    */
 
893  0 toggle private Set<String> getResidueAnnotaitons(Residue residue,
894    ResidueDetailType type)
895    {
896  0 HashSet<String> foundAnnotations = new HashSet<String>();
897  0 List<ResidueDetail> resDetails = residue.getResidueDetail();
898  0 for (ResidueDetail resDetail : resDetails)
899    {
900  0 if (resDetail.getProperty().equalsIgnoreCase(type.getCode()))
901    {
902  0 foundAnnotations.add(resDetail.getContent());
903    }
904    }
905  0 return foundAnnotations;
906    }
907   
 
908  0 toggle @Override
909    public boolean isAccessionMatched(String accession)
910    {
911  0 boolean isStrictMatch = true;
912  0 return isStrictMatch ? curSourceDBRef.equalsIgnoreCase(accession)
913    : curDBRefAccessionIdsString
914    .contains(accession.toLowerCase(Locale.ROOT));
915    }
916   
 
917  0 toggle private boolean isFoundInSiftsEntry(String accessionId)
918    {
919  0 Set<String> siftsDBRefs = getAllMappingAccession();
920  0 return accessionId != null
921    && siftsDBRefs.contains(accessionId.toLowerCase(Locale.ROOT));
922    }
923   
924    /**
925    * Pad omitted residue positions in PDB sequence with gaps
926    *
927    * @param resNumMap
928    */
 
929  0 toggle void padWithGaps(Map<Integer, String> resNumMap,
930    List<Integer> omitNonObserved)
931    {
932  0 if (resNumMap == null || resNumMap.isEmpty())
933    {
934  0 return;
935    }
936  0 Integer[] keys = resNumMap.keySet().toArray(new Integer[0]);
937    // Arrays.sort(keys);
938  0 int firstIndex = keys[0];
939  0 int lastIndex = keys[keys.length - 1];
940    // jalview.bin.Console.outPrintln("Min value " + firstIndex);
941    // jalview.bin.Console.outPrintln("Max value " + lastIndex);
942  0 for (int x = firstIndex; x <= lastIndex; x++)
943    {
944  0 if (!resNumMap.containsKey(x) && !omitNonObserved.contains(x))
945    {
946  0 resNumMap.put(x, "-");
947    }
948    }
949    }
950   
 
951  0 toggle @Override
952    public Entity getEntityById(String id) throws SiftsException
953    {
954    // Determines an entity to process by performing a heuristic matching of all
955    // Entities with the given chainId and choosing the best matching Entity
956  0 Entity entity = getEntityByMostOptimalMatchedId(id);
957  0 if (entity != null)
958    {
959  0 return entity;
960    }
961  0 throw new SiftsException("Entity " + id + " not found");
962    }
963   
964    /**
965    * This method was added because EntityId is NOT always equal to ChainId.
966    * Hence, it provides the logic to greedily detect the "true" Entity for a
967    * given chainId where discrepancies exist.
968    *
969    * @param chainId
970    * @return
971    */
 
972  0 toggle public Entity getEntityByMostOptimalMatchedId(String chainId)
973    {
974    // jalview.bin.Console.outPrintln("---> advanced greedy entityId matching
975    // block
976    // entered..");
977  0 List<Entity> entities = siftsEntry.getEntity();
978  0 SiftsEntitySortPojo[] sPojo = new SiftsEntitySortPojo[entities.size()];
979  0 int count = 0;
980  0 for (Entity entity : entities)
981    {
982  0 sPojo[count] = new SiftsEntitySortPojo();
983  0 sPojo[count].entityId = entity.getEntityId();
984   
985  0 List<Segment> segments = entity.getSegment();
986  0 for (Segment segment : segments)
987    {
988  0 List<Residue> residues = segment.getListResidue().getResidue();
989  0 for (Residue residue : residues)
990    {
991  0 List<CrossRefDb> cRefDbs = residue.getCrossRefDb();
992  0 for (CrossRefDb cRefDb : cRefDbs)
993    {
994  0 if (!cRefDb.getDbSource().equalsIgnoreCase("PDB"))
995    {
996  0 continue;
997    }
998  0 ++sPojo[count].resCount;
999  0 if (cRefDb.getDbChainId().equalsIgnoreCase(chainId))
1000    {
1001  0 ++sPojo[count].chainIdFreq;
1002    }
1003    }
1004    }
1005    }
1006  0 sPojo[count].pid = (100 * sPojo[count].chainIdFreq)
1007    / sPojo[count].resCount;
1008  0 ++count;
1009    }
1010  0 Arrays.sort(sPojo, Collections.reverseOrder());
1011    // jalview.bin.Console.outPrintln("highest matched entity : " +
1012    // sPojo[0].entityId);
1013    // jalview.bin.Console.outPrintln("highest matched pid : " + sPojo[0].pid);
1014   
1015  0 if (sPojo[0].entityId != null)
1016    {
1017  0 if (sPojo[0].pid < 1)
1018    {
1019  0 return null;
1020    }
1021  0 for (Entity entity : entities)
1022    {
1023  0 if (!entity.getEntityId().equalsIgnoreCase(sPojo[0].entityId))
1024    {
1025  0 continue;
1026    }
1027  0 return entity;
1028    }
1029    }
1030  0 return null;
1031    }
1032   
 
1033    private class SiftsEntitySortPojo
1034    implements Comparable<SiftsEntitySortPojo>
1035    {
1036    public String entityId;
1037   
1038    public int chainIdFreq;
1039   
1040    public int pid;
1041   
1042    public int resCount;
1043   
 
1044  0 toggle @Override
1045    public int compareTo(SiftsEntitySortPojo o)
1046    {
1047  0 return this.pid - o.pid;
1048    }
1049    }
1050   
 
1051    private class SegmentHelperPojo
1052    {
1053    private SequenceI seq;
1054   
1055    private HashMap<Integer, int[]> mapping;
1056   
1057    private TreeMap<Integer, String> resNumMap;
1058   
1059    private List<Integer> omitNonObserved;
1060   
1061    private int nonObservedShiftIndex;
1062   
1063    /**
1064    * count of number of 'not observed' positions in the PDB record's SEQRES
1065    * (total number of residues with coordinates == length(SEQRES) -
1066    * pdbeNonObserved
1067    */
1068    private int pdbeNonObserved;
1069   
 
1070  0 toggle public SegmentHelperPojo(SequenceI seq, HashMap<Integer, int[]> mapping,
1071    TreeMap<Integer, String> resNumMap,
1072    List<Integer> omitNonObserved, int nonObservedShiftIndex,
1073    int pdbeNonObserved)
1074    {
1075  0 setSeq(seq);
1076  0 setMapping(mapping);
1077  0 setResNumMap(resNumMap);
1078  0 setOmitNonObserved(omitNonObserved);
1079  0 setNonObservedShiftIndex(nonObservedShiftIndex);
1080  0 setPdbeNonObserved(pdbeNonObserved);
1081   
1082    }
1083   
 
1084  0 toggle public void setPdbeNonObserved(int pdbeNonObserved2)
1085    {
1086  0 this.pdbeNonObserved = pdbeNonObserved2;
1087    }
1088   
 
1089  0 toggle public int getPdbeNonObserved()
1090    {
1091  0 return pdbeNonObserved;
1092    }
1093   
 
1094  0 toggle public SequenceI getSeq()
1095    {
1096  0 return seq;
1097    }
1098   
 
1099  0 toggle public void setSeq(SequenceI seq)
1100    {
1101  0 this.seq = seq;
1102    }
1103   
 
1104  0 toggle public HashMap<Integer, int[]> getMapping()
1105    {
1106  0 return mapping;
1107    }
1108   
 
1109  0 toggle public void setMapping(HashMap<Integer, int[]> mapping)
1110    {
1111  0 this.mapping = mapping;
1112    }
1113   
 
1114  0 toggle public TreeMap<Integer, String> getResNumMap()
1115    {
1116  0 return resNumMap;
1117    }
1118   
 
1119  0 toggle public void setResNumMap(TreeMap<Integer, String> resNumMap)
1120    {
1121  0 this.resNumMap = resNumMap;
1122    }
1123   
 
1124  0 toggle public List<Integer> getOmitNonObserved()
1125    {
1126  0 return omitNonObserved;
1127    }
1128   
 
1129  0 toggle public void setOmitNonObserved(List<Integer> omitNonObserved)
1130    {
1131  0 this.omitNonObserved = omitNonObserved;
1132    }
1133   
 
1134  0 toggle public int getNonObservedShiftIndex()
1135    {
1136  0 return nonObservedShiftIndex;
1137    }
1138   
 
1139  0 toggle public void setNonObservedShiftIndex(int nonObservedShiftIndex)
1140    {
1141  0 this.nonObservedShiftIndex = nonObservedShiftIndex;
1142    }
1143   
1144    }
1145   
 
1146  0 toggle @Override
1147    public StringBuilder getMappingOutput(MappingOutputPojo mp)
1148    throws SiftsException
1149    {
1150  0 String seqRes = mp.getSeqResidue();
1151  0 String seqName = mp.getSeqName();
1152  0 int sStart = mp.getSeqStart();
1153  0 int sEnd = mp.getSeqEnd();
1154   
1155  0 String strRes = mp.getStrResidue();
1156  0 String strName = mp.getStrName();
1157  0 int pdbStart = mp.getStrStart();
1158  0 int pdbEnd = mp.getStrEnd();
1159   
1160  0 String type = mp.getType();
1161   
1162  0 int maxid = (seqName.length() >= strName.length()) ? seqName.length()
1163    : strName.length();
1164  0 int len = 72 - maxid - 1;
1165   
1166  0 int nochunks = ((seqRes.length()) / len)
1167  0 + ((seqRes.length()) % len > 0 ? 1 : 0);
1168    // output mappings
1169  0 StringBuilder output = new StringBuilder(512);
1170  0 output.append(NEWLINE);
1171  0 output.append("Sequence \u27f7 Structure mapping details")
1172    .append(NEWLINE);
1173  0 output.append("Method: SIFTS");
1174  0 output.append(NEWLINE).append(NEWLINE);
1175   
1176  0 output.append(new Format("%" + maxid + "s").form(seqName));
1177  0 output.append(" : ");
1178  0 output.append(String.valueOf(sStart));
1179  0 output.append(" - ");
1180  0 output.append(String.valueOf(sEnd));
1181  0 output.append(" Maps to ");
1182  0 output.append(NEWLINE);
1183  0 output.append(new Format("%" + maxid + "s").form(structId));
1184  0 output.append(" : ");
1185  0 output.append(String.valueOf(pdbStart));
1186  0 output.append(" - ");
1187  0 output.append(String.valueOf(pdbEnd));
1188  0 output.append(NEWLINE).append(NEWLINE);
1189   
1190  0 ScoreMatrix pam250 = ScoreModels.getInstance().getPam250();
1191  0 int matchedSeqCount = 0;
1192  0 for (int j = 0; j < nochunks; j++)
1193    {
1194    // Print the first aligned sequence
1195  0 output.append(new Format("%" + (maxid) + "s").form(seqName))
1196    .append(" ");
1197   
1198  0 for (int i = 0; i < len; i++)
1199    {
1200  0 if ((i + (j * len)) < seqRes.length())
1201    {
1202  0 output.append(seqRes.charAt(i + (j * len)));
1203    }
1204    }
1205   
1206  0 output.append(NEWLINE);
1207  0 output.append(new Format("%" + (maxid) + "s").form(" ")).append(" ");
1208   
1209    /*
1210    * Print out the match symbols:
1211    * | for exact match (ignoring case)
1212    * . if PAM250 score is positive
1213    * else a space
1214    */
1215  0 for (int i = 0; i < len; i++)
1216    {
1217  0 try
1218    {
1219  0 if ((i + (j * len)) < seqRes.length())
1220    {
1221  0 char c1 = seqRes.charAt(i + (j * len));
1222  0 char c2 = strRes.charAt(i + (j * len));
1223  0 boolean sameChar = Comparison.isSameResidue(c1, c2, false);
1224  0 if (sameChar && !Comparison.isGap(c1))
1225    {
1226  0 matchedSeqCount++;
1227  0 output.append("|");
1228    }
1229  0 else if (type.equals("pep"))
1230    {
1231  0 if (pam250.getPairwiseScore(c1, c2) > 0)
1232    {
1233  0 output.append(".");
1234    }
1235    else
1236    {
1237  0 output.append(" ");
1238    }
1239    }
1240    else
1241    {
1242  0 output.append(" ");
1243    }
1244    }
1245    } catch (IndexOutOfBoundsException e)
1246    {
1247  0 continue;
1248    }
1249    }
1250    // Now print the second aligned sequence
1251  0 output = output.append(NEWLINE);
1252  0 output = output.append(new Format("%" + (maxid) + "s").form(strName))
1253    .append(" ");
1254  0 for (int i = 0; i < len; i++)
1255    {
1256  0 if ((i + (j * len)) < strRes.length())
1257    {
1258  0 output.append(strRes.charAt(i + (j * len)));
1259    }
1260    }
1261  0 output.append(NEWLINE).append(NEWLINE);
1262    }
1263  0 float pid = (float) matchedSeqCount / seqRes.length() * 100;
1264  0 if (pid < SiftsSettings.getFailSafePIDThreshold())
1265    {
1266  0 throw new SiftsException(">>> Low PID detected for SIFTs mapping...");
1267    }
1268  0 output.append("Length of alignment = " + seqRes.length())
1269    .append(NEWLINE);
1270  0 output.append(new Format("Percentage ID = %2.2f").form(pid));
1271  0 return output;
1272    }
1273   
 
1274  0 toggle @Override
1275    public int getEntityCount()
1276    {
1277  0 return siftsEntry.getEntity().size();
1278    }
1279   
 
1280  0 toggle @Override
1281    public String getDbAccessionId()
1282    {
1283  0 return siftsEntry.getDbAccessionId();
1284    }
1285   
 
1286  0 toggle @Override
1287    public String getDbCoordSys()
1288    {
1289  0 return siftsEntry.getDbCoordSys();
1290    }
1291   
 
1292  0 toggle @Override
1293    public String getDbSource()
1294    {
1295  0 return siftsEntry.getDbSource();
1296    }
1297   
 
1298  0 toggle @Override
1299    public String getDbVersion()
1300    {
1301  0 return siftsEntry.getDbVersion();
1302    }
1303   
 
1304  0 toggle public static void setMockSiftsFile(File file)
1305    {
1306  0 mockSiftsFile = file;
1307    }
1308   
1309    }