Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
SiftsClient | 82 | 388 | 128 | ||
SiftsClient.CoordinateSys | 128 | 2 | 2 | ||
SiftsClient.ResidueDetailType | 145 | 2 | 2 | ||
SiftsClient.SiftsEntitySortPojo | 1033 | 1 | 1 | ||
SiftsClient.SegmentHelperPojo | 1051 | 18 | 13 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.ws.sifts; | |
22 | ||
23 | import java.io.File; | |
24 | import java.io.FileInputStream; | |
25 | import java.io.FileOutputStream; | |
26 | import java.io.IOException; | |
27 | import java.io.InputStream; | |
28 | import java.io.PrintStream; | |
29 | import java.net.URL; | |
30 | import java.net.URLConnection; | |
31 | import java.nio.file.Files; | |
32 | import java.nio.file.Path; | |
33 | import java.nio.file.attribute.BasicFileAttributes; | |
34 | import java.util.ArrayList; | |
35 | import java.util.Arrays; | |
36 | import java.util.Collection; | |
37 | import java.util.Collections; | |
38 | import java.util.Date; | |
39 | import java.util.HashMap; | |
40 | import java.util.HashSet; | |
41 | import java.util.List; | |
42 | import java.util.Locale; | |
43 | import java.util.Map; | |
44 | import java.util.Set; | |
45 | import java.util.TreeMap; | |
46 | import java.util.zip.GZIPInputStream; | |
47 | ||
48 | import javax.xml.bind.JAXBContext; | |
49 | import javax.xml.bind.JAXBElement; | |
50 | import javax.xml.bind.Unmarshaller; | |
51 | import javax.xml.stream.XMLInputFactory; | |
52 | import javax.xml.stream.XMLStreamReader; | |
53 | ||
54 | import jalview.analysis.AlignSeq; | |
55 | import jalview.analysis.scoremodels.ScoreMatrix; | |
56 | import jalview.analysis.scoremodels.ScoreModels; | |
57 | import jalview.api.DBRefEntryI; | |
58 | import jalview.api.SiftsClientI; | |
59 | import jalview.bin.Console; | |
60 | import jalview.datamodel.DBRefEntry; | |
61 | import jalview.datamodel.DBRefSource; | |
62 | import jalview.datamodel.SequenceI; | |
63 | import jalview.io.BackupFiles; | |
64 | import jalview.io.StructureFile; | |
65 | import jalview.schemes.ResidueProperties; | |
66 | import jalview.structure.StructureMapping; | |
67 | import jalview.util.Comparison; | |
68 | import jalview.util.DBRefUtils; | |
69 | import jalview.util.Format; | |
70 | import jalview.util.HttpUtils; | |
71 | import jalview.util.Platform; | |
72 | import jalview.xml.binding.sifts.Entry; | |
73 | import jalview.xml.binding.sifts.Entry.Entity; | |
74 | import jalview.xml.binding.sifts.Entry.Entity.Segment; | |
75 | import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion; | |
76 | import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue; | |
77 | import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb; | |
78 | import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail; | |
79 | import mc_view.Atom; | |
80 | import mc_view.PDBChain; | |
81 | ||
82 | public class SiftsClient implements SiftsClientI | |
83 | { | |
84 | /* | |
85 | * for use in mocking out file fetch for tests only | |
86 | * - reset to null after testing! | |
87 | */ | |
88 | private static File mockSiftsFile; | |
89 | ||
90 | private Entry siftsEntry; | |
91 | ||
92 | private StructureFile pdb; | |
93 | ||
94 | private String pdbId; | |
95 | ||
96 | private String structId; | |
97 | ||
98 | private CoordinateSys seqCoordSys = CoordinateSys.UNIPROT; | |
99 | ||
100 | /** | |
101 | * PDB sequence position to sequence coordinate mapping as derived from SIFTS | |
102 | * record for the identified SeqCoordSys Used for lift-over from sequence | |
103 | * derived from PDB (with first extracted PDBRESNUM as 'start' to the sequence | |
104 | * being annotated with PDB data | |
105 | */ | |
106 | private jalview.datamodel.Mapping seqFromPdbMapping; | |
107 | ||
108 | private static final int BUFFER_SIZE = 4096; | |
109 | ||
110 | public static final int UNASSIGNED = Integer.MIN_VALUE; | |
111 | ||
112 | private static final int PDB_RES_POS = 0; | |
113 | ||
114 | private static final int PDB_ATOM_POS = 1; | |
115 | ||
116 | private static final int PDBE_POS = 2; | |
117 | ||
118 | private static final String NOT_OBSERVED = "Not_Observed"; | |
119 | ||
120 | private static final String SIFTS_SPLIT_FTP_BASE_URL = "https://ftp.ebi.ac.uk/pub/databases/msd/sifts/split_xml/"; | |
121 | ||
122 | private final static String NEWLINE = System.lineSeparator(); | |
123 | ||
124 | private String curSourceDBRef; | |
125 | ||
126 | private HashSet<String> curDBRefAccessionIdsString; | |
127 | ||
128 | private enum CoordinateSys | |
129 | { | |
130 | UNIPROT("UniProt"), PDB("PDBresnum"), PDBe("PDBe"); | |
131 | ||
132 | private String name; | |
133 | ||
134 | 12 | private CoordinateSys(String name) |
135 | { | |
136 | 12 | this.name = name; |
137 | } | |
138 | ||
139 | 0 | public String getName() |
140 | { | |
141 | 0 | return name; |
142 | } | |
143 | }; | |
144 | ||
145 | private enum ResidueDetailType | |
146 | { | |
147 | NAME_SEC_STRUCTURE("nameSecondaryStructure"), | |
148 | CODE_SEC_STRUCTURE("codeSecondaryStructure"), ANNOTATION("Annotation"); | |
149 | ||
150 | private String code; | |
151 | ||
152 | 0 | private ResidueDetailType(String code) |
153 | { | |
154 | 0 | this.code = code; |
155 | } | |
156 | ||
157 | 0 | public String getCode() |
158 | { | |
159 | 0 | return code; |
160 | } | |
161 | }; | |
162 | ||
163 | /** | |
164 | * Fetch SIFTs file for the given PDBfile and construct an instance of | |
165 | * SiftsClient | |
166 | * | |
167 | * @param pdbId | |
168 | * @throws SiftsException | |
169 | */ | |
170 | 4 | public SiftsClient(StructureFile pdb) throws SiftsException |
171 | { | |
172 | 4 | this.pdb = pdb; |
173 | 4 | this.pdbId = pdb.getId(); |
174 | 4 | File siftsFile = getSiftsFile(pdbId); |
175 | 4 | siftsEntry = parseSIFTs(siftsFile); |
176 | } | |
177 | ||
178 | /** | |
179 | * Parse the given SIFTs File and return a JAXB POJO of parsed data | |
180 | * | |
181 | * @param siftFile | |
182 | * - the GZipped SIFTs XML file to parse | |
183 | * @return | |
184 | * @throws Exception | |
185 | * if a problem occurs while parsing the SIFTs XML | |
186 | */ | |
187 | 4 | private Entry parseSIFTs(File siftFile) throws SiftsException |
188 | { | |
189 | 4 | try (InputStream in = new FileInputStream(siftFile); |
190 | 4 | GZIPInputStream gzis = new GZIPInputStream(in);) |
191 | { | |
192 | // jalview.bin.Console.outPrintln("File : " + siftFile.getAbsolutePath()); | |
193 | 4 | JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.sifts"); |
194 | 4 | XMLStreamReader streamReader = XMLInputFactory.newInstance() |
195 | .createXMLStreamReader(gzis); | |
196 | 4 | Unmarshaller um = jc.createUnmarshaller(); |
197 | 4 | JAXBElement<Entry> jbe = um.unmarshal(streamReader, Entry.class); |
198 | 4 | return jbe.getValue(); |
199 | } catch (Exception e) | |
200 | { | |
201 | 0 | e.printStackTrace(); |
202 | 0 | throw new SiftsException(e.getMessage()); |
203 | } | |
204 | } | |
205 | ||
206 | /** | |
207 | * Get a SIFTs XML file for a given PDB Id from Cache or download from FTP | |
208 | * repository if not found in cache | |
209 | * | |
210 | * @param pdbId | |
211 | * @return SIFTs XML file | |
212 | * @throws SiftsException | |
213 | */ | |
214 | 4 | public static File getSiftsFile(String pdbId) throws SiftsException |
215 | { | |
216 | /* | |
217 | * return mocked file if it has been set | |
218 | */ | |
219 | 4 | if (mockSiftsFile != null) |
220 | { | |
221 | 0 | return mockSiftsFile; |
222 | } | |
223 | ||
224 | 4 | String siftsFileName = SiftsSettings.getSiftDownloadDirectory() |
225 | + pdbId.toLowerCase(Locale.ROOT) + ".xml.gz"; | |
226 | 4 | File siftsFile = new File(siftsFileName); |
227 | 4 | if (siftsFile.exists()) |
228 | { | |
229 | // The line below is required for unit testing... don't comment it out!!! | |
230 | 4 | jalview.bin.Console |
231 | .outPrintln(">>> SIFTS File already downloaded for " + pdbId); | |
232 | ||
233 | 4 | if (isFileOlderThanThreshold(siftsFile, |
234 | SiftsSettings.getCacheThresholdInDays())) | |
235 | { | |
236 | 0 | File oldSiftsFile = new File(siftsFileName + "_old"); |
237 | 0 | BackupFiles.moveFileToFile(siftsFile, oldSiftsFile); |
238 | 0 | try |
239 | { | |
240 | 0 | siftsFile = downloadSiftsFile(pdbId.toLowerCase(Locale.ROOT)); |
241 | 0 | oldSiftsFile.delete(); |
242 | 0 | return siftsFile; |
243 | } catch (IOException e) | |
244 | { | |
245 | 0 | e.printStackTrace(); |
246 | 0 | BackupFiles.moveFileToFile(oldSiftsFile, siftsFile); |
247 | 0 | return new File(siftsFileName); |
248 | } | |
249 | } | |
250 | else | |
251 | { | |
252 | 4 | return siftsFile; |
253 | } | |
254 | } | |
255 | 0 | try |
256 | { | |
257 | 0 | siftsFile = downloadSiftsFile(pdbId.toLowerCase(Locale.ROOT)); |
258 | } catch (IOException e) | |
259 | { | |
260 | 0 | throw new SiftsException(e.getMessage()); |
261 | } | |
262 | 0 | return siftsFile; |
263 | } | |
264 | ||
265 | /** | |
266 | * This method enables checking if a cached file has exceeded a certain | |
267 | * threshold(in days) | |
268 | * | |
269 | * @param file | |
270 | * the cached file | |
271 | * @param noOfDays | |
272 | * the threshold in days | |
273 | * @return | |
274 | */ | |
275 | 4 | public static boolean isFileOlderThanThreshold(File file, int noOfDays) |
276 | { | |
277 | 4 | Path filePath = file.toPath(); |
278 | 4 | BasicFileAttributes attr; |
279 | 4 | int diffInDays = 0; |
280 | 4 | try |
281 | { | |
282 | 4 | attr = Files.readAttributes(filePath, BasicFileAttributes.class); |
283 | 4 | diffInDays = (int) ((new Date().getTime() |
284 | - attr.lastModifiedTime().toMillis()) | |
285 | / (1000 * 60 * 60 * 24)); | |
286 | // jalview.bin.Console.outPrintln("Diff in days : " + diffInDays); | |
287 | } catch (IOException e) | |
288 | { | |
289 | 0 | e.printStackTrace(); |
290 | } | |
291 | 4 | return noOfDays <= diffInDays; |
292 | } | |
293 | ||
294 | /** | |
295 | * Download a SIFTs XML file for a given PDB Id from an FTP repository | |
296 | * | |
297 | * @param pdbId | |
298 | * @return downloaded SIFTs XML file | |
299 | * @throws SiftsException | |
300 | * @throws IOException | |
301 | */ | |
302 | 0 | public static File downloadSiftsFile(String pdbId) |
303 | throws SiftsException, IOException | |
304 | { | |
305 | 0 | if (pdbId.contains(".cif")) |
306 | { | |
307 | 0 | pdbId = pdbId.replace(".cif", ""); |
308 | } | |
309 | 0 | String siftFile = pdbId + ".xml.gz"; |
310 | 0 | String siftsFileFTPURL = getDownloadUrlFor(siftFile); |
311 | ||
312 | /* | |
313 | * Download the file from URL to either | |
314 | * Java: directory of cached downloaded SIFTS files | |
315 | * Javascript: temporary 'file' (in-memory cache) | |
316 | */ | |
317 | 0 | File downloadTo = null; |
318 | 0 | if (Platform.isJS()) |
319 | { | |
320 | 0 | downloadTo = File.createTempFile(siftFile, ".xml.gz"); |
321 | } | |
322 | else | |
323 | { | |
324 | 0 | downloadTo = new File( |
325 | SiftsSettings.getSiftDownloadDirectory() + siftFile); | |
326 | 0 | File siftsDownloadDir = new File( |
327 | SiftsSettings.getSiftDownloadDirectory()); | |
328 | 0 | if (!siftsDownloadDir.exists()) |
329 | { | |
330 | 0 | siftsDownloadDir.mkdirs(); |
331 | } | |
332 | } | |
333 | ||
334 | // jalview.bin.Console.outPrintln(">> Download ftp url : " + | |
335 | // siftsFileFTPURL); | |
336 | // long now = System.currentTimeMillis(); | |
337 | 0 | URL url = new URL(siftsFileFTPURL); |
338 | 0 | URLConnection conn = HttpUtils.openConnection(url); |
339 | 0 | InputStream inputStream = conn.getInputStream(); |
340 | 0 | FileOutputStream outputStream = new FileOutputStream(downloadTo); |
341 | 0 | byte[] buffer = new byte[BUFFER_SIZE]; |
342 | 0 | int bytesRead = -1; |
343 | 0 | while ((bytesRead = inputStream.read(buffer)) != -1) |
344 | { | |
345 | 0 | outputStream.write(buffer, 0, bytesRead); |
346 | } | |
347 | 0 | outputStream.close(); |
348 | 0 | inputStream.close(); |
349 | // jalview.bin.Console.outPrintln(">>> File downloaded : " + | |
350 | // downloadedSiftsFile | |
351 | // + " took " + (System.currentTimeMillis() - now) + "ms"); | |
352 | 0 | return downloadTo; |
353 | } | |
354 | ||
355 | 1 | public static String getDownloadUrlFor(String siftFile) |
356 | { | |
357 | 1 | String durl = SIFTS_SPLIT_FTP_BASE_URL + siftFile.substring(1, 3) + "/" |
358 | + siftFile; | |
359 | 1 | Console.trace("SIFTS URL for " + siftFile + " is " + durl); |
360 | 1 | return durl; |
361 | ||
362 | } | |
363 | ||
364 | /** | |
365 | * Delete the SIFTs file for the given PDB Id in the local SIFTs download | |
366 | * directory | |
367 | * | |
368 | * @param pdbId | |
369 | * @return true if the file was deleted or doesn't exist | |
370 | */ | |
371 | 0 | public static boolean deleteSiftsFileByPDBId(String pdbId) |
372 | { | |
373 | 0 | File siftsFile = new File(SiftsSettings.getSiftDownloadDirectory() |
374 | + pdbId.toLowerCase(Locale.ROOT) + ".xml.gz"); | |
375 | 0 | if (siftsFile.exists()) |
376 | { | |
377 | 0 | return siftsFile.delete(); |
378 | } | |
379 | 0 | return true; |
380 | } | |
381 | ||
382 | /** | |
383 | * Get a valid SIFTs DBRef for the given sequence current SIFTs entry | |
384 | * | |
385 | * @param seq | |
386 | * - the target sequence for the operation | |
387 | * @return a valid DBRefEntry that is SIFTs compatible | |
388 | * @throws Exception | |
389 | * if no valid source DBRefEntry was found for the given sequences | |
390 | */ | |
391 | 0 | public DBRefEntryI getValidSourceDBRef(SequenceI seq) |
392 | throws SiftsException | |
393 | { | |
394 | 0 | List<DBRefEntry> dbRefs = seq.getPrimaryDBRefs(); |
395 | 0 | if (dbRefs == null || dbRefs.size() < 1) |
396 | { | |
397 | 0 | throw new SiftsException( |
398 | "Source DBRef could not be determined. DBRefs might not have been retrieved."); | |
399 | } | |
400 | ||
401 | 0 | for (DBRefEntry dbRef : dbRefs) |
402 | { | |
403 | 0 | if (dbRef == null || dbRef.getAccessionId() == null |
404 | || dbRef.getSource() == null) | |
405 | { | |
406 | 0 | continue; |
407 | } | |
408 | 0 | String canonicalSource = DBRefUtils |
409 | .getCanonicalName(dbRef.getSource()); | |
410 | 0 | if (isValidDBRefEntry(dbRef) |
411 | && (canonicalSource.equalsIgnoreCase(DBRefSource.UNIPROT) | |
412 | || canonicalSource.equalsIgnoreCase(DBRefSource.PDB))) | |
413 | { | |
414 | 0 | return dbRef; |
415 | } | |
416 | } | |
417 | 0 | throw new SiftsException("Could not get source DB Ref"); |
418 | } | |
419 | ||
420 | /** | |
421 | * Check that the DBRef Entry is properly populated and is available in this | |
422 | * SiftClient instance | |
423 | * | |
424 | * @param entry | |
425 | * - DBRefEntry to validate | |
426 | * @return true validation is successful otherwise false is returned. | |
427 | */ | |
428 | 0 | boolean isValidDBRefEntry(DBRefEntryI entry) |
429 | { | |
430 | 0 | return entry != null && entry.getAccessionId() != null |
431 | && isFoundInSiftsEntry(entry.getAccessionId()); | |
432 | } | |
433 | ||
434 | 0 | @Override |
435 | public HashSet<String> getAllMappingAccession() | |
436 | { | |
437 | 0 | HashSet<String> accessions = new HashSet<String>(); |
438 | 0 | List<Entity> entities = siftsEntry.getEntity(); |
439 | 0 | for (Entity entity : entities) |
440 | { | |
441 | 0 | List<Segment> segments = entity.getSegment(); |
442 | 0 | for (Segment segment : segments) |
443 | { | |
444 | 0 | List<MapRegion> mapRegions = segment.getListMapRegion() |
445 | .getMapRegion(); | |
446 | 0 | for (MapRegion mapRegion : mapRegions) |
447 | { | |
448 | 0 | accessions.add(mapRegion.getDb().getDbAccessionId() |
449 | .toLowerCase(Locale.ROOT)); | |
450 | } | |
451 | } | |
452 | } | |
453 | 0 | return accessions; |
454 | } | |
455 | ||
456 | 0 | @Override |
457 | public StructureMapping getSiftsStructureMapping(SequenceI seq, | |
458 | String pdbFile, String chain) throws SiftsException | |
459 | { | |
460 | 0 | SequenceI aseq = seq; |
461 | 0 | while (seq.getDatasetSequence() != null) |
462 | { | |
463 | 0 | seq = seq.getDatasetSequence(); |
464 | } | |
465 | 0 | structId = (chain == null) ? pdbId : pdbId + "|" + chain; |
466 | 0 | jalview.bin.Console.outPrintln("Getting SIFTS mapping for " + structId |
467 | + ": seq " + seq.getName()); | |
468 | ||
469 | 0 | final StringBuilder mappingDetails = new StringBuilder(128); |
470 | 0 | PrintStream ps = new PrintStream(System.out) |
471 | { | |
472 | 0 | @Override |
473 | public void print(String x) | |
474 | { | |
475 | 0 | mappingDetails.append(x); |
476 | } | |
477 | ||
478 | 0 | @Override |
479 | public void println() | |
480 | { | |
481 | 0 | mappingDetails.append(NEWLINE); |
482 | } | |
483 | }; | |
484 | 0 | HashMap<Integer, int[]> mapping = getGreedyMapping(chain, seq, ps); |
485 | ||
486 | 0 | String mappingOutput = mappingDetails.toString(); |
487 | 0 | StructureMapping siftsMapping = new StructureMapping(aseq, pdbFile, |
488 | pdbId, chain, mapping, mappingOutput, seqFromPdbMapping); | |
489 | ||
490 | 0 | return siftsMapping; |
491 | } | |
492 | ||
493 | 0 | @Override |
494 | public HashMap<Integer, int[]> getGreedyMapping(String entityId, | |
495 | SequenceI seq, java.io.PrintStream os) throws SiftsException | |
496 | { | |
497 | 0 | List<Integer> omitNonObserved = new ArrayList<>(); |
498 | 0 | int nonObservedShiftIndex = 0, pdbeNonObserved = 0; |
499 | // jalview.bin.Console.outPrintln("Generating mappings for : " + entityId); | |
500 | 0 | Entity entity = null; |
501 | 0 | entity = getEntityById(entityId); |
502 | 0 | String originalSeq = AlignSeq.extractGaps( |
503 | jalview.util.Comparison.GapChars, seq.getSequenceAsString()); | |
504 | 0 | HashMap<Integer, int[]> mapping = new HashMap<Integer, int[]>(); |
505 | 0 | DBRefEntryI sourceDBRef; |
506 | 0 | sourceDBRef = getValidSourceDBRef(seq); |
507 | // TODO ensure sequence start/end is in the same coordinate system and | |
508 | // consistent with the choosen sourceDBRef | |
509 | ||
510 | // set sequence coordinate system - default value is UniProt | |
511 | 0 | if (sourceDBRef.getSource().equalsIgnoreCase(DBRefSource.PDB)) |
512 | { | |
513 | 0 | seqCoordSys = CoordinateSys.PDB; |
514 | } | |
515 | ||
516 | 0 | HashSet<String> dbRefAccessionIdsString = new HashSet<String>(); |
517 | 0 | for (DBRefEntry dbref : seq.getDBRefs()) |
518 | { | |
519 | 0 | dbRefAccessionIdsString |
520 | .add(dbref.getAccessionId().toLowerCase(Locale.ROOT)); | |
521 | } | |
522 | 0 | dbRefAccessionIdsString |
523 | .add(sourceDBRef.getAccessionId().toLowerCase(Locale.ROOT)); | |
524 | ||
525 | 0 | curDBRefAccessionIdsString = dbRefAccessionIdsString; |
526 | 0 | curSourceDBRef = sourceDBRef.getAccessionId(); |
527 | ||
528 | 0 | TreeMap<Integer, String> resNumMap = new TreeMap<Integer, String>(); |
529 | 0 | List<Segment> segments = entity.getSegment(); |
530 | 0 | SegmentHelperPojo shp = new SegmentHelperPojo(seq, mapping, resNumMap, |
531 | omitNonObserved, nonObservedShiftIndex, pdbeNonObserved); | |
532 | 0 | processSegments(segments, shp); |
533 | 0 | try |
534 | { | |
535 | 0 | populateAtomPositions(entityId, mapping); |
536 | } catch (Exception e) | |
537 | { | |
538 | 0 | e.printStackTrace(); |
539 | } | |
540 | 0 | if (seqCoordSys == CoordinateSys.UNIPROT) |
541 | { | |
542 | 0 | padWithGaps(resNumMap, omitNonObserved); |
543 | } | |
544 | 0 | int seqStart = UNASSIGNED; |
545 | 0 | int seqEnd = UNASSIGNED; |
546 | 0 | int pdbStart = UNASSIGNED; |
547 | 0 | int pdbEnd = UNASSIGNED; |
548 | ||
549 | 0 | if (mapping.isEmpty()) |
550 | { | |
551 | 0 | throw new SiftsException("SIFTS mapping failed for " + entityId |
552 | + " and " + seq.getName()); | |
553 | } | |
554 | // also construct a mapping object between the seq-coord sys and the PDB | |
555 | // seq's coord sys | |
556 | ||
557 | 0 | Integer[] keys = mapping.keySet().toArray(new Integer[0]); |
558 | 0 | Arrays.sort(keys); |
559 | 0 | seqStart = keys[0]; |
560 | 0 | seqEnd = keys[keys.length - 1]; |
561 | 0 | List<int[]> from = new ArrayList<>(), to = new ArrayList<>(); |
562 | 0 | int[] _cfrom = null, _cto = null; |
563 | 0 | String matchedSeq = originalSeq; |
564 | 0 | if (seqStart != UNASSIGNED) // fixme! seqStart can map to -1 for a pdb |
565 | // sequence that starts <-1 | |
566 | { | |
567 | 0 | for (int seqps : keys) |
568 | { | |
569 | 0 | int pdbpos = mapping.get(seqps)[PDBE_POS]; |
570 | 0 | if (pdbpos == UNASSIGNED) |
571 | { | |
572 | // not correct - pdbpos might be -1, but leave it for now | |
573 | 0 | continue; |
574 | } | |
575 | 0 | if (_cfrom == null || seqps != _cfrom[1] + 1) |
576 | { | |
577 | 0 | _cfrom = new int[] { seqps, seqps }; |
578 | 0 | from.add(_cfrom); |
579 | 0 | _cto = null; // discontinuity |
580 | } | |
581 | else | |
582 | { | |
583 | 0 | _cfrom[1] = seqps; |
584 | } | |
585 | 0 | if (_cto == null || pdbpos != 1 + _cto[1]) |
586 | { | |
587 | 0 | _cto = new int[] { pdbpos, pdbpos }; |
588 | 0 | to.add(_cto); |
589 | } | |
590 | else | |
591 | { | |
592 | 0 | _cto[1] = pdbpos; |
593 | } | |
594 | } | |
595 | 0 | _cfrom = new int[from.size() * 2]; |
596 | 0 | _cto = new int[to.size() * 2]; |
597 | 0 | int p = 0; |
598 | 0 | for (int[] range : from) |
599 | { | |
600 | 0 | _cfrom[p++] = range[0]; |
601 | 0 | _cfrom[p++] = range[1]; |
602 | } | |
603 | 0 | ; |
604 | 0 | p = 0; |
605 | 0 | for (int[] range : to) |
606 | { | |
607 | 0 | _cto[p++] = range[0]; |
608 | 0 | _cto[p++] = range[1]; |
609 | } | |
610 | 0 | ; |
611 | ||
612 | 0 | seqFromPdbMapping = new jalview.datamodel.Mapping(null, _cto, _cfrom, |
613 | 1, 1); | |
614 | 0 | pdbStart = mapping.get(seqStart)[PDB_RES_POS]; |
615 | 0 | pdbEnd = mapping.get(seqEnd)[PDB_RES_POS]; |
616 | 0 | int orignalSeqStart = seq.getStart(); |
617 | 0 | if (orignalSeqStart >= 1) |
618 | { | |
619 | 0 | int subSeqStart = (seqStart >= orignalSeqStart) |
620 | ? seqStart - orignalSeqStart | |
621 | : 0; | |
622 | 0 | int subSeqEnd = seqEnd - (orignalSeqStart - 1); |
623 | 0 | subSeqEnd = originalSeq.length() < subSeqEnd ? originalSeq.length() |
624 | : subSeqEnd; | |
625 | 0 | matchedSeq = originalSeq.substring(subSeqStart, subSeqEnd); |
626 | } | |
627 | else | |
628 | { | |
629 | 0 | matchedSeq = originalSeq.substring(1, originalSeq.length()); |
630 | } | |
631 | } | |
632 | ||
633 | 0 | StringBuilder targetStrucSeqs = new StringBuilder(); |
634 | 0 | for (String res : resNumMap.values()) |
635 | { | |
636 | 0 | targetStrucSeqs.append(res); |
637 | } | |
638 | ||
639 | 0 | if (os != null) |
640 | { | |
641 | 0 | MappingOutputPojo mop = new MappingOutputPojo(); |
642 | 0 | mop.setSeqStart(seqStart); |
643 | 0 | mop.setSeqEnd(seqEnd); |
644 | 0 | mop.setSeqName(seq.getName()); |
645 | 0 | mop.setSeqResidue(matchedSeq); |
646 | ||
647 | 0 | mop.setStrStart(pdbStart); |
648 | 0 | mop.setStrEnd(pdbEnd); |
649 | 0 | mop.setStrName(structId); |
650 | 0 | mop.setStrResidue(targetStrucSeqs.toString()); |
651 | ||
652 | 0 | mop.setType("pep"); |
653 | 0 | os.print(getMappingOutput(mop).toString()); |
654 | 0 | os.println(); |
655 | } | |
656 | 0 | return mapping; |
657 | } | |
658 | ||
659 | 0 | void processSegments(List<Segment> segments, SegmentHelperPojo shp) |
660 | { | |
661 | 0 | SequenceI seq = shp.getSeq(); |
662 | 0 | HashMap<Integer, int[]> mapping = shp.getMapping(); |
663 | 0 | TreeMap<Integer, String> resNumMap = shp.getResNumMap(); |
664 | 0 | List<Integer> omitNonObserved = shp.getOmitNonObserved(); |
665 | 0 | int nonObservedShiftIndex = shp.getNonObservedShiftIndex(); |
666 | 0 | int pdbeNonObservedCount = shp.getPdbeNonObserved(); |
667 | 0 | int firstPDBResNum = UNASSIGNED; |
668 | 0 | for (Segment segment : segments) |
669 | { | |
670 | // jalview.bin.Console.outPrintln("Mapping segments : " + | |
671 | // segment.getSegId() + "\\"s | |
672 | // + segStartEnd); | |
673 | 0 | List<Residue> residues = segment.getListResidue().getResidue(); |
674 | 0 | for (Residue residue : residues) |
675 | { | |
676 | 0 | boolean isObserved = isResidueObserved(residue); |
677 | 0 | int pdbeIndex = getLeadingIntegerValue(residue.getDbResNum(), |
678 | UNASSIGNED); | |
679 | 0 | int currSeqIndex = UNASSIGNED; |
680 | 0 | List<CrossRefDb> cRefDbs = residue.getCrossRefDb(); |
681 | 0 | CrossRefDb pdbRefDb = null; |
682 | 0 | for (CrossRefDb cRefDb : cRefDbs) |
683 | { | |
684 | 0 | if (cRefDb.getDbSource().equalsIgnoreCase(DBRefSource.PDB)) |
685 | { | |
686 | 0 | pdbRefDb = cRefDb; |
687 | 0 | if (firstPDBResNum == UNASSIGNED) |
688 | { | |
689 | 0 | firstPDBResNum = getLeadingIntegerValue(cRefDb.getDbResNum(), |
690 | UNASSIGNED); | |
691 | } | |
692 | else | |
693 | { | |
694 | 0 | if (isObserved) |
695 | { | |
696 | // after we find the first observed residue we just increment | |
697 | 0 | firstPDBResNum++; |
698 | } | |
699 | } | |
700 | } | |
701 | 0 | if (cRefDb.getDbCoordSys().equalsIgnoreCase(seqCoordSys.getName()) |
702 | && isAccessionMatched(cRefDb.getDbAccessionId())) | |
703 | { | |
704 | 0 | currSeqIndex = getLeadingIntegerValue(cRefDb.getDbResNum(), |
705 | UNASSIGNED); | |
706 | 0 | if (pdbRefDb != null) |
707 | { | |
708 | 0 | break;// exit loop if pdb and uniprot are already found |
709 | } | |
710 | } | |
711 | } | |
712 | 0 | if (!isObserved) |
713 | { | |
714 | 0 | ++pdbeNonObservedCount; |
715 | } | |
716 | 0 | if (seqCoordSys == seqCoordSys.PDB) // FIXME: is seqCoordSys ever PDBe |
717 | // ??? | |
718 | { | |
719 | // if the sequence has a primary reference to the PDB, then we are | |
720 | // dealing with a sequence extracted directly from the PDB. In that | |
721 | // case, numbering is PDBe - non-observed residues | |
722 | 0 | currSeqIndex = seq.getStart() - 1 + pdbeIndex; |
723 | } | |
724 | 0 | if (!isObserved) |
725 | { | |
726 | 0 | if (seqCoordSys != CoordinateSys.UNIPROT) // FIXME: PDB or PDBe only |
727 | // here | |
728 | { | |
729 | // mapping to PDB or PDBe so we need to bookkeep for the | |
730 | // non-observed | |
731 | // SEQRES positions | |
732 | 0 | omitNonObserved.add(currSeqIndex); |
733 | 0 | ++nonObservedShiftIndex; |
734 | } | |
735 | } | |
736 | 0 | if (currSeqIndex == UNASSIGNED) |
737 | { | |
738 | // change in logic - unobserved residues with no currSeqIndex | |
739 | // corresponding are still counted in both nonObservedShiftIndex and | |
740 | // pdbeIndex... | |
741 | 0 | continue; |
742 | } | |
743 | // if (currSeqIndex >= seq.getStart() && currSeqIndex <= seqlength) // | |
744 | // true | |
745 | // numbering | |
746 | // is | |
747 | // not | |
748 | // up | |
749 | // to | |
750 | // seq.getEnd() | |
751 | { | |
752 | ||
753 | 0 | int resNum = (pdbRefDb == null) |
754 | ? getLeadingIntegerValue(residue.getDbResNum(), | |
755 | UNASSIGNED) | |
756 | : getLeadingIntegerValue(pdbRefDb.getDbResNum(), | |
757 | UNASSIGNED); | |
758 | ||
759 | 0 | if (isObserved) |
760 | { | |
761 | 0 | char resCharCode = ResidueProperties |
762 | .getSingleCharacterCode(ResidueProperties | |
763 | .getCanonicalAminoAcid(residue.getDbResName())); | |
764 | 0 | resNumMap.put(currSeqIndex, String.valueOf(resCharCode)); |
765 | ||
766 | 0 | int[] mappingcols = new int[] { Integer.valueOf(resNum), |
767 | 0 | UNASSIGNED, isObserved ? firstPDBResNum : UNASSIGNED }; |
768 | ||
769 | 0 | mapping.put(currSeqIndex - nonObservedShiftIndex, mappingcols); |
770 | } | |
771 | } | |
772 | } | |
773 | } | |
774 | } | |
775 | ||
776 | /** | |
777 | * Get the leading integer part of a string that begins with an integer. | |
778 | * | |
779 | * @param input | |
780 | * - the string input to process | |
781 | * @param failValue | |
782 | * - value returned if unsuccessful | |
783 | * @return | |
784 | */ | |
785 | 0 | static int getLeadingIntegerValue(String input, int failValue) |
786 | { | |
787 | 0 | if (input == null) |
788 | { | |
789 | 0 | return failValue; |
790 | } | |
791 | 0 | String[] parts = input.split("(?=\\D)(?<=\\d)"); |
792 | 0 | if (parts != null && parts.length > 0 && parts[0].matches("[0-9]+")) |
793 | { | |
794 | 0 | return Integer.valueOf(parts[0]); |
795 | } | |
796 | 0 | return failValue; |
797 | } | |
798 | ||
799 | /** | |
800 | * | |
801 | * @param chainId | |
802 | * Target chain to populate mapping of its atom positions. | |
803 | * @param mapping | |
804 | * Two dimension array of residue index versus atom position | |
805 | * @throws IllegalArgumentException | |
806 | * Thrown if chainId or mapping is null | |
807 | * @throws SiftsException | |
808 | */ | |
809 | 0 | void populateAtomPositions(String chainId, Map<Integer, int[]> mapping) |
810 | throws IllegalArgumentException, SiftsException | |
811 | { | |
812 | 0 | try |
813 | { | |
814 | 0 | PDBChain chain = pdb.findChain(chainId); |
815 | ||
816 | 0 | if (chain == null || mapping == null) |
817 | { | |
818 | 0 | throw new IllegalArgumentException( |
819 | "Chain id or mapping must not be null."); | |
820 | } | |
821 | 0 | for (int[] map : mapping.values()) |
822 | { | |
823 | 0 | if (map[PDB_RES_POS] != UNASSIGNED) |
824 | { | |
825 | 0 | map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms); |
826 | } | |
827 | } | |
828 | } catch (NullPointerException e) | |
829 | { | |
830 | 0 | throw new SiftsException(e.getMessage()); |
831 | } catch (Exception e) | |
832 | { | |
833 | 0 | throw new SiftsException(e.getMessage()); |
834 | } | |
835 | } | |
836 | ||
837 | /** | |
838 | * | |
839 | * @param residueIndex | |
840 | * The residue index used for the search | |
841 | * @param atoms | |
842 | * A collection of Atom to search | |
843 | * @return atom position for the given residue index | |
844 | */ | |
845 | 0 | int getAtomIndex(int residueIndex, Collection<Atom> atoms) |
846 | { | |
847 | 0 | if (atoms == null) |
848 | { | |
849 | 0 | throw new IllegalArgumentException( |
850 | "atoms collection must not be null!"); | |
851 | } | |
852 | 0 | for (Atom atom : atoms) |
853 | { | |
854 | 0 | if (atom.resNumber == residueIndex) |
855 | { | |
856 | 0 | return atom.atomIndex; |
857 | } | |
858 | } | |
859 | 0 | return UNASSIGNED; |
860 | } | |
861 | ||
862 | /** | |
863 | * Checks if the residue instance is marked 'Not_observed' or not | |
864 | * | |
865 | * @param residue | |
866 | * @return | |
867 | */ | |
868 | 0 | private boolean isResidueObserved(Residue residue) |
869 | { | |
870 | 0 | Set<String> annotations = getResidueAnnotaitons(residue, |
871 | ResidueDetailType.ANNOTATION); | |
872 | 0 | if (annotations == null || annotations.isEmpty()) |
873 | { | |
874 | 0 | return true; |
875 | } | |
876 | 0 | for (String annotation : annotations) |
877 | { | |
878 | 0 | if (annotation.equalsIgnoreCase(NOT_OBSERVED)) |
879 | { | |
880 | 0 | return false; |
881 | } | |
882 | } | |
883 | 0 | return true; |
884 | } | |
885 | ||
886 | /** | |
887 | * Get annotation String for a given residue and annotation type | |
888 | * | |
889 | * @param residue | |
890 | * @param type | |
891 | * @return | |
892 | */ | |
893 | 0 | private Set<String> getResidueAnnotaitons(Residue residue, |
894 | ResidueDetailType type) | |
895 | { | |
896 | 0 | HashSet<String> foundAnnotations = new HashSet<String>(); |
897 | 0 | List<ResidueDetail> resDetails = residue.getResidueDetail(); |
898 | 0 | for (ResidueDetail resDetail : resDetails) |
899 | { | |
900 | 0 | if (resDetail.getProperty().equalsIgnoreCase(type.getCode())) |
901 | { | |
902 | 0 | foundAnnotations.add(resDetail.getContent()); |
903 | } | |
904 | } | |
905 | 0 | return foundAnnotations; |
906 | } | |
907 | ||
908 | 0 | @Override |
909 | public boolean isAccessionMatched(String accession) | |
910 | { | |
911 | 0 | boolean isStrictMatch = true; |
912 | 0 | return isStrictMatch ? curSourceDBRef.equalsIgnoreCase(accession) |
913 | : curDBRefAccessionIdsString | |
914 | .contains(accession.toLowerCase(Locale.ROOT)); | |
915 | } | |
916 | ||
917 | 0 | private boolean isFoundInSiftsEntry(String accessionId) |
918 | { | |
919 | 0 | Set<String> siftsDBRefs = getAllMappingAccession(); |
920 | 0 | return accessionId != null |
921 | && siftsDBRefs.contains(accessionId.toLowerCase(Locale.ROOT)); | |
922 | } | |
923 | ||
924 | /** | |
925 | * Pad omitted residue positions in PDB sequence with gaps | |
926 | * | |
927 | * @param resNumMap | |
928 | */ | |
929 | 0 | void padWithGaps(Map<Integer, String> resNumMap, |
930 | List<Integer> omitNonObserved) | |
931 | { | |
932 | 0 | if (resNumMap == null || resNumMap.isEmpty()) |
933 | { | |
934 | 0 | return; |
935 | } | |
936 | 0 | Integer[] keys = resNumMap.keySet().toArray(new Integer[0]); |
937 | // Arrays.sort(keys); | |
938 | 0 | int firstIndex = keys[0]; |
939 | 0 | int lastIndex = keys[keys.length - 1]; |
940 | // jalview.bin.Console.outPrintln("Min value " + firstIndex); | |
941 | // jalview.bin.Console.outPrintln("Max value " + lastIndex); | |
942 | 0 | for (int x = firstIndex; x <= lastIndex; x++) |
943 | { | |
944 | 0 | if (!resNumMap.containsKey(x) && !omitNonObserved.contains(x)) |
945 | { | |
946 | 0 | resNumMap.put(x, "-"); |
947 | } | |
948 | } | |
949 | } | |
950 | ||
951 | 0 | @Override |
952 | public Entity getEntityById(String id) throws SiftsException | |
953 | { | |
954 | // Determines an entity to process by performing a heuristic matching of all | |
955 | // Entities with the given chainId and choosing the best matching Entity | |
956 | 0 | Entity entity = getEntityByMostOptimalMatchedId(id); |
957 | 0 | if (entity != null) |
958 | { | |
959 | 0 | return entity; |
960 | } | |
961 | 0 | throw new SiftsException("Entity " + id + " not found"); |
962 | } | |
963 | ||
964 | /** | |
965 | * This method was added because EntityId is NOT always equal to ChainId. | |
966 | * Hence, it provides the logic to greedily detect the "true" Entity for a | |
967 | * given chainId where discrepancies exist. | |
968 | * | |
969 | * @param chainId | |
970 | * @return | |
971 | */ | |
972 | 0 | public Entity getEntityByMostOptimalMatchedId(String chainId) |
973 | { | |
974 | // jalview.bin.Console.outPrintln("---> advanced greedy entityId matching | |
975 | // block | |
976 | // entered.."); | |
977 | 0 | List<Entity> entities = siftsEntry.getEntity(); |
978 | 0 | SiftsEntitySortPojo[] sPojo = new SiftsEntitySortPojo[entities.size()]; |
979 | 0 | int count = 0; |
980 | 0 | for (Entity entity : entities) |
981 | { | |
982 | 0 | sPojo[count] = new SiftsEntitySortPojo(); |
983 | 0 | sPojo[count].entityId = entity.getEntityId(); |
984 | ||
985 | 0 | List<Segment> segments = entity.getSegment(); |
986 | 0 | for (Segment segment : segments) |
987 | { | |
988 | 0 | List<Residue> residues = segment.getListResidue().getResidue(); |
989 | 0 | for (Residue residue : residues) |
990 | { | |
991 | 0 | List<CrossRefDb> cRefDbs = residue.getCrossRefDb(); |
992 | 0 | for (CrossRefDb cRefDb : cRefDbs) |
993 | { | |
994 | 0 | if (!cRefDb.getDbSource().equalsIgnoreCase("PDB")) |
995 | { | |
996 | 0 | continue; |
997 | } | |
998 | 0 | ++sPojo[count].resCount; |
999 | 0 | if (cRefDb.getDbChainId().equalsIgnoreCase(chainId)) |
1000 | { | |
1001 | 0 | ++sPojo[count].chainIdFreq; |
1002 | } | |
1003 | } | |
1004 | } | |
1005 | } | |
1006 | 0 | sPojo[count].pid = (100 * sPojo[count].chainIdFreq) |
1007 | / sPojo[count].resCount; | |
1008 | 0 | ++count; |
1009 | } | |
1010 | 0 | Arrays.sort(sPojo, Collections.reverseOrder()); |
1011 | // jalview.bin.Console.outPrintln("highest matched entity : " + | |
1012 | // sPojo[0].entityId); | |
1013 | // jalview.bin.Console.outPrintln("highest matched pid : " + sPojo[0].pid); | |
1014 | ||
1015 | 0 | if (sPojo[0].entityId != null) |
1016 | { | |
1017 | 0 | if (sPojo[0].pid < 1) |
1018 | { | |
1019 | 0 | return null; |
1020 | } | |
1021 | 0 | for (Entity entity : entities) |
1022 | { | |
1023 | 0 | if (!entity.getEntityId().equalsIgnoreCase(sPojo[0].entityId)) |
1024 | { | |
1025 | 0 | continue; |
1026 | } | |
1027 | 0 | return entity; |
1028 | } | |
1029 | } | |
1030 | 0 | return null; |
1031 | } | |
1032 | ||
1033 | private class SiftsEntitySortPojo | |
1034 | implements Comparable<SiftsEntitySortPojo> | |
1035 | { | |
1036 | public String entityId; | |
1037 | ||
1038 | public int chainIdFreq; | |
1039 | ||
1040 | public int pid; | |
1041 | ||
1042 | public int resCount; | |
1043 | ||
1044 | 0 | @Override |
1045 | public int compareTo(SiftsEntitySortPojo o) | |
1046 | { | |
1047 | 0 | return this.pid - o.pid; |
1048 | } | |
1049 | } | |
1050 | ||
1051 | private class SegmentHelperPojo | |
1052 | { | |
1053 | private SequenceI seq; | |
1054 | ||
1055 | private HashMap<Integer, int[]> mapping; | |
1056 | ||
1057 | private TreeMap<Integer, String> resNumMap; | |
1058 | ||
1059 | private List<Integer> omitNonObserved; | |
1060 | ||
1061 | private int nonObservedShiftIndex; | |
1062 | ||
1063 | /** | |
1064 | * count of number of 'not observed' positions in the PDB record's SEQRES | |
1065 | * (total number of residues with coordinates == length(SEQRES) - | |
1066 | * pdbeNonObserved | |
1067 | */ | |
1068 | private int pdbeNonObserved; | |
1069 | ||
1070 | 0 | public SegmentHelperPojo(SequenceI seq, HashMap<Integer, int[]> mapping, |
1071 | TreeMap<Integer, String> resNumMap, | |
1072 | List<Integer> omitNonObserved, int nonObservedShiftIndex, | |
1073 | int pdbeNonObserved) | |
1074 | { | |
1075 | 0 | setSeq(seq); |
1076 | 0 | setMapping(mapping); |
1077 | 0 | setResNumMap(resNumMap); |
1078 | 0 | setOmitNonObserved(omitNonObserved); |
1079 | 0 | setNonObservedShiftIndex(nonObservedShiftIndex); |
1080 | 0 | setPdbeNonObserved(pdbeNonObserved); |
1081 | ||
1082 | } | |
1083 | ||
1084 | 0 | public void setPdbeNonObserved(int pdbeNonObserved2) |
1085 | { | |
1086 | 0 | this.pdbeNonObserved = pdbeNonObserved2; |
1087 | } | |
1088 | ||
1089 | 0 | public int getPdbeNonObserved() |
1090 | { | |
1091 | 0 | return pdbeNonObserved; |
1092 | } | |
1093 | ||
1094 | 0 | public SequenceI getSeq() |
1095 | { | |
1096 | 0 | return seq; |
1097 | } | |
1098 | ||
1099 | 0 | public void setSeq(SequenceI seq) |
1100 | { | |
1101 | 0 | this.seq = seq; |
1102 | } | |
1103 | ||
1104 | 0 | public HashMap<Integer, int[]> getMapping() |
1105 | { | |
1106 | 0 | return mapping; |
1107 | } | |
1108 | ||
1109 | 0 | public void setMapping(HashMap<Integer, int[]> mapping) |
1110 | { | |
1111 | 0 | this.mapping = mapping; |
1112 | } | |
1113 | ||
1114 | 0 | public TreeMap<Integer, String> getResNumMap() |
1115 | { | |
1116 | 0 | return resNumMap; |
1117 | } | |
1118 | ||
1119 | 0 | public void setResNumMap(TreeMap<Integer, String> resNumMap) |
1120 | { | |
1121 | 0 | this.resNumMap = resNumMap; |
1122 | } | |
1123 | ||
1124 | 0 | public List<Integer> getOmitNonObserved() |
1125 | { | |
1126 | 0 | return omitNonObserved; |
1127 | } | |
1128 | ||
1129 | 0 | public void setOmitNonObserved(List<Integer> omitNonObserved) |
1130 | { | |
1131 | 0 | this.omitNonObserved = omitNonObserved; |
1132 | } | |
1133 | ||
1134 | 0 | public int getNonObservedShiftIndex() |
1135 | { | |
1136 | 0 | return nonObservedShiftIndex; |
1137 | } | |
1138 | ||
1139 | 0 | public void setNonObservedShiftIndex(int nonObservedShiftIndex) |
1140 | { | |
1141 | 0 | this.nonObservedShiftIndex = nonObservedShiftIndex; |
1142 | } | |
1143 | ||
1144 | } | |
1145 | ||
1146 | 0 | @Override |
1147 | public StringBuilder getMappingOutput(MappingOutputPojo mp) | |
1148 | throws SiftsException | |
1149 | { | |
1150 | 0 | String seqRes = mp.getSeqResidue(); |
1151 | 0 | String seqName = mp.getSeqName(); |
1152 | 0 | int sStart = mp.getSeqStart(); |
1153 | 0 | int sEnd = mp.getSeqEnd(); |
1154 | ||
1155 | 0 | String strRes = mp.getStrResidue(); |
1156 | 0 | String strName = mp.getStrName(); |
1157 | 0 | int pdbStart = mp.getStrStart(); |
1158 | 0 | int pdbEnd = mp.getStrEnd(); |
1159 | ||
1160 | 0 | String type = mp.getType(); |
1161 | ||
1162 | 0 | int maxid = (seqName.length() >= strName.length()) ? seqName.length() |
1163 | : strName.length(); | |
1164 | 0 | int len = 72 - maxid - 1; |
1165 | ||
1166 | 0 | int nochunks = ((seqRes.length()) / len) |
1167 | 0 | + ((seqRes.length()) % len > 0 ? 1 : 0); |
1168 | // output mappings | |
1169 | 0 | StringBuilder output = new StringBuilder(512); |
1170 | 0 | output.append(NEWLINE); |
1171 | 0 | output.append("Sequence \u27f7 Structure mapping details") |
1172 | .append(NEWLINE); | |
1173 | 0 | output.append("Method: SIFTS"); |
1174 | 0 | output.append(NEWLINE).append(NEWLINE); |
1175 | ||
1176 | 0 | output.append(new Format("%" + maxid + "s").form(seqName)); |
1177 | 0 | output.append(" : "); |
1178 | 0 | output.append(String.valueOf(sStart)); |
1179 | 0 | output.append(" - "); |
1180 | 0 | output.append(String.valueOf(sEnd)); |
1181 | 0 | output.append(" Maps to "); |
1182 | 0 | output.append(NEWLINE); |
1183 | 0 | output.append(new Format("%" + maxid + "s").form(structId)); |
1184 | 0 | output.append(" : "); |
1185 | 0 | output.append(String.valueOf(pdbStart)); |
1186 | 0 | output.append(" - "); |
1187 | 0 | output.append(String.valueOf(pdbEnd)); |
1188 | 0 | output.append(NEWLINE).append(NEWLINE); |
1189 | ||
1190 | 0 | ScoreMatrix pam250 = ScoreModels.getInstance().getPam250(); |
1191 | 0 | int matchedSeqCount = 0; |
1192 | 0 | for (int j = 0; j < nochunks; j++) |
1193 | { | |
1194 | // Print the first aligned sequence | |
1195 | 0 | output.append(new Format("%" + (maxid) + "s").form(seqName)) |
1196 | .append(" "); | |
1197 | ||
1198 | 0 | for (int i = 0; i < len; i++) |
1199 | { | |
1200 | 0 | if ((i + (j * len)) < seqRes.length()) |
1201 | { | |
1202 | 0 | output.append(seqRes.charAt(i + (j * len))); |
1203 | } | |
1204 | } | |
1205 | ||
1206 | 0 | output.append(NEWLINE); |
1207 | 0 | output.append(new Format("%" + (maxid) + "s").form(" ")).append(" "); |
1208 | ||
1209 | /* | |
1210 | * Print out the match symbols: | |
1211 | * | for exact match (ignoring case) | |
1212 | * . if PAM250 score is positive | |
1213 | * else a space | |
1214 | */ | |
1215 | 0 | for (int i = 0; i < len; i++) |
1216 | { | |
1217 | 0 | try |
1218 | { | |
1219 | 0 | if ((i + (j * len)) < seqRes.length()) |
1220 | { | |
1221 | 0 | char c1 = seqRes.charAt(i + (j * len)); |
1222 | 0 | char c2 = strRes.charAt(i + (j * len)); |
1223 | 0 | boolean sameChar = Comparison.isSameResidue(c1, c2, false); |
1224 | 0 | if (sameChar && !Comparison.isGap(c1)) |
1225 | { | |
1226 | 0 | matchedSeqCount++; |
1227 | 0 | output.append("|"); |
1228 | } | |
1229 | 0 | else if (type.equals("pep")) |
1230 | { | |
1231 | 0 | if (pam250.getPairwiseScore(c1, c2) > 0) |
1232 | { | |
1233 | 0 | output.append("."); |
1234 | } | |
1235 | else | |
1236 | { | |
1237 | 0 | output.append(" "); |
1238 | } | |
1239 | } | |
1240 | else | |
1241 | { | |
1242 | 0 | output.append(" "); |
1243 | } | |
1244 | } | |
1245 | } catch (IndexOutOfBoundsException e) | |
1246 | { | |
1247 | 0 | continue; |
1248 | } | |
1249 | } | |
1250 | // Now print the second aligned sequence | |
1251 | 0 | output = output.append(NEWLINE); |
1252 | 0 | output = output.append(new Format("%" + (maxid) + "s").form(strName)) |
1253 | .append(" "); | |
1254 | 0 | for (int i = 0; i < len; i++) |
1255 | { | |
1256 | 0 | if ((i + (j * len)) < strRes.length()) |
1257 | { | |
1258 | 0 | output.append(strRes.charAt(i + (j * len))); |
1259 | } | |
1260 | } | |
1261 | 0 | output.append(NEWLINE).append(NEWLINE); |
1262 | } | |
1263 | 0 | float pid = (float) matchedSeqCount / seqRes.length() * 100; |
1264 | 0 | if (pid < SiftsSettings.getFailSafePIDThreshold()) |
1265 | { | |
1266 | 0 | throw new SiftsException(">>> Low PID detected for SIFTs mapping..."); |
1267 | } | |
1268 | 0 | output.append("Length of alignment = " + seqRes.length()) |
1269 | .append(NEWLINE); | |
1270 | 0 | output.append(new Format("Percentage ID = %2.2f").form(pid)); |
1271 | 0 | return output; |
1272 | } | |
1273 | ||
1274 | 0 | @Override |
1275 | public int getEntityCount() | |
1276 | { | |
1277 | 0 | return siftsEntry.getEntity().size(); |
1278 | } | |
1279 | ||
1280 | 0 | @Override |
1281 | public String getDbAccessionId() | |
1282 | { | |
1283 | 0 | return siftsEntry.getDbAccessionId(); |
1284 | } | |
1285 | ||
1286 | 0 | @Override |
1287 | public String getDbCoordSys() | |
1288 | { | |
1289 | 0 | return siftsEntry.getDbCoordSys(); |
1290 | } | |
1291 | ||
1292 | 0 | @Override |
1293 | public String getDbSource() | |
1294 | { | |
1295 | 0 | return siftsEntry.getDbSource(); |
1296 | } | |
1297 | ||
1298 | 0 | @Override |
1299 | public String getDbVersion() | |
1300 | { | |
1301 | 0 | return siftsEntry.getDbVersion(); |
1302 | } | |
1303 | ||
1304 | 0 | public static void setMockSiftsFile(File file) |
1305 | { | |
1306 | 0 | mockSiftsFile = file; |
1307 | } | |
1308 | ||
1309 | } |