Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
PDBStructureChooserQuerySource | 56 | 123 | 33 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.gui.structurechooser; | |
22 | ||
23 | import java.util.Locale; | |
24 | ||
25 | import java.util.ArrayList; | |
26 | import java.util.Collection; | |
27 | import java.util.HashSet; | |
28 | import java.util.LinkedHashSet; | |
29 | import java.util.List; | |
30 | import java.util.Objects; | |
31 | import java.util.Set; | |
32 | ||
33 | import javax.swing.JTable; | |
34 | import javax.swing.table.TableModel; | |
35 | ||
36 | import jalview.datamodel.DBRefEntry; | |
37 | import jalview.datamodel.DBRefSource; | |
38 | import jalview.datamodel.PDBEntry; | |
39 | import jalview.datamodel.SequenceI; | |
40 | import jalview.fts.api.FTSData; | |
41 | import jalview.fts.api.FTSDataColumnI; | |
42 | import jalview.fts.api.FTSRestClientI; | |
43 | import jalview.fts.core.FTSDataColumnPreferences; | |
44 | import jalview.fts.core.FTSDataColumnPreferences.PreferenceSource; | |
45 | import jalview.fts.core.FTSRestRequest; | |
46 | import jalview.fts.core.FTSRestResponse; | |
47 | import jalview.fts.service.pdb.PDBFTSRestClient; | |
48 | import jalview.jbgui.FilterOption; | |
49 | import jalview.util.MessageManager; | |
50 | ||
51 | /** | |
52 | * logic for querying the PDBe API for structures of sequences | |
53 | * | |
54 | * @author jprocter | |
55 | */ | |
56 | public class PDBStructureChooserQuerySource | |
57 | extends StructureChooserQuerySource | |
58 | { | |
59 | ||
60 | private static int MAX_QLENGTH = 7820; | |
61 | ||
62 | protected FTSRestRequest lastPdbRequest; | |
63 | ||
64 | protected FTSRestClientI pdbRestClient; | |
65 | ||
66 | 68 | public PDBStructureChooserQuerySource() |
67 | { | |
68 | 68 | pdbRestClient = PDBFTSRestClient.getInstance(); |
69 | 68 | docFieldPrefs = new FTSDataColumnPreferences( |
70 | PreferenceSource.STRUCTURE_CHOOSER, | |
71 | PDBFTSRestClient.getInstance()); | |
72 | ||
73 | } | |
74 | ||
75 | /** | |
76 | * Builds a query string for a given sequences using its DBRef entries | |
77 | * | |
78 | * @param seq | |
79 | * the sequences to build a query for | |
80 | * @return the built query string | |
81 | */ | |
82 | ||
83 | 6 | public String buildQuery(SequenceI seq) |
84 | { | |
85 | 6 | boolean isPDBRefsFound = false; |
86 | 6 | boolean isUniProtRefsFound = false; |
87 | 6 | StringBuilder queryBuilder = new StringBuilder(); |
88 | 6 | Set<String> seqRefs = new LinkedHashSet<>(); |
89 | ||
90 | /* | |
91 | * note PDBs as DBRefEntry so they are not duplicated in query | |
92 | */ | |
93 | 6 | Set<String> pdbids = new HashSet<>(); |
94 | ||
95 | 6 | if (seq.getAllPDBEntries() != null |
96 | && queryBuilder.length() < MAX_QLENGTH) | |
97 | { | |
98 | 6 | for (PDBEntry entry : seq.getAllPDBEntries()) |
99 | { | |
100 | 2 | if (isValidSeqName(entry.getId())) |
101 | { | |
102 | 2 | String id = entry.getId().toLowerCase(Locale.ROOT); |
103 | 2 | queryBuilder.append("pdb_id:").append(id).append(" OR "); |
104 | 2 | isPDBRefsFound = true; |
105 | 2 | pdbids.add(id); |
106 | } | |
107 | } | |
108 | } | |
109 | ||
110 | 6 | List<DBRefEntry> refs = seq.getDBRefs(); |
111 | 6 | if (refs != null && refs.size() != 0) |
112 | { | |
113 | 24 | for (int ib = 0, nb = refs.size(); ib < nb; ib++) |
114 | { | |
115 | 19 | DBRefEntry dbRef = refs.get(ib); |
116 | 19 | if (isValidSeqName(getDBRefId(dbRef)) |
117 | && queryBuilder.length() < MAX_QLENGTH) | |
118 | { | |
119 | 19 | if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT)) |
120 | { | |
121 | 2 | queryBuilder.append("uniprot_accession:") |
122 | .append(getDBRefId(dbRef)).append(" OR "); | |
123 | 2 | queryBuilder.append("uniprot_id:").append(getDBRefId(dbRef)) |
124 | .append(" OR "); | |
125 | 2 | isUniProtRefsFound = true; |
126 | } | |
127 | 17 | else if (dbRef.getSource().equalsIgnoreCase(DBRefSource.PDB)) |
128 | { | |
129 | ||
130 | 1 | String id = getDBRefId(dbRef).toLowerCase(Locale.ROOT); |
131 | 1 | if (!pdbids.contains(id)) |
132 | { | |
133 | 1 | queryBuilder.append("pdb_id:").append(id).append(" OR "); |
134 | 1 | isPDBRefsFound = true; |
135 | 1 | pdbids.add(id); |
136 | } | |
137 | } | |
138 | else | |
139 | { | |
140 | 16 | seqRefs.add(getDBRefId(dbRef)); |
141 | } | |
142 | } | |
143 | } | |
144 | } | |
145 | ||
146 | 6 | if (!isPDBRefsFound && !isUniProtRefsFound) |
147 | { | |
148 | 2 | String seqName = seq.getName(); |
149 | 2 | seqName = sanitizeSeqName(seqName); |
150 | 2 | String[] names = seqName.toLowerCase(Locale.ROOT).split("\\|"); |
151 | 2 | for (String name : names) |
152 | { | |
153 | // jalview.bin.Console.outPrintln("Found name : " + name); | |
154 | 8 | name.trim(); |
155 | 8 | if (isValidSeqName(name)) |
156 | { | |
157 | 4 | seqRefs.add(name); |
158 | } | |
159 | } | |
160 | ||
161 | 2 | for (String seqRef : seqRefs) |
162 | { | |
163 | 6 | queryBuilder.append("text:").append(seqRef).append(" OR "); |
164 | } | |
165 | } | |
166 | ||
167 | 6 | int endIndex = queryBuilder.lastIndexOf(" OR "); |
168 | 6 | if (queryBuilder.toString().length() < 6) |
169 | { | |
170 | 0 | return null; |
171 | } | |
172 | 6 | String query = queryBuilder.toString().substring(0, endIndex); |
173 | 6 | return query; |
174 | } | |
175 | ||
176 | /** | |
177 | * Remove the following special characters from input string +, -, &, !, (, ), | |
178 | * {, }, [, ], ^, ", ~, *, ?, :, \ | |
179 | * | |
180 | * @param seqName | |
181 | * @return | |
182 | */ | |
183 | 10 | public static String sanitizeSeqName(String seqName) |
184 | { | |
185 | 10 | Objects.requireNonNull(seqName); |
186 | 10 | return seqName.replaceAll("\\[\\d*\\]", "") |
187 | .replaceAll("[^\\dA-Za-z|_]", "").replaceAll("\\s+", "+"); | |
188 | } | |
189 | ||
190 | /** | |
191 | * Ensures sequence ref names are not less than 3 characters and does not | |
192 | * contain a database name | |
193 | * | |
194 | * @param seqName | |
195 | * @return | |
196 | */ | |
197 | 29 | static boolean isValidSeqName(String seqName) |
198 | { | |
199 | // jalview.bin.Console.outPrintln("seqName : " + seqName); | |
200 | 29 | String ignoreList = "pdb,uniprot,swiss-prot"; |
201 | 29 | if (seqName.length() < 3) |
202 | { | |
203 | 2 | return false; |
204 | } | |
205 | 27 | if (seqName.contains(":")) |
206 | { | |
207 | 0 | return false; |
208 | } | |
209 | 27 | seqName = seqName.toLowerCase(Locale.ROOT); |
210 | 27 | for (String ignoredEntry : ignoreList.split(",")) |
211 | { | |
212 | 77 | if (seqName.contains(ignoredEntry)) |
213 | { | |
214 | 2 | return false; |
215 | } | |
216 | } | |
217 | 25 | return true; |
218 | } | |
219 | ||
220 | 40 | static String getDBRefId(DBRefEntry dbRef) |
221 | { | |
222 | 40 | String ref = dbRef.getAccessionId().replaceAll("GO:", ""); |
223 | 40 | return ref; |
224 | } | |
225 | ||
226 | /** | |
227 | * FTSRestClient specific query builder to recover associated structure data | |
228 | * records for a sequence | |
229 | * | |
230 | * @param seq | |
231 | * - seq to generate a query for | |
232 | * @param wantedFields | |
233 | * - fields to retrieve | |
234 | * @param selectedFilterOpt | |
235 | * - criterion for ranking results (e.g. resolution) | |
236 | * @param b | |
237 | * - sort ascending or descending | |
238 | * @return | |
239 | * @throws Exception | |
240 | */ | |
241 | 2 | public FTSRestResponse fetchStructuresMetaData(SequenceI seq, |
242 | Collection<FTSDataColumnI> wantedFields, | |
243 | FilterOption selectedFilterOpt, boolean b) throws Exception | |
244 | { | |
245 | 2 | FTSRestResponse resultList; |
246 | 2 | FTSRestRequest pdbRequest = new FTSRestRequest(); |
247 | 2 | pdbRequest.setAllowEmptySeq(false); |
248 | 2 | pdbRequest.setResponseSize(500); |
249 | 2 | pdbRequest.setFieldToSearchBy("("); |
250 | 2 | pdbRequest.setFieldToSortBy(selectedFilterOpt.getValue(), b); |
251 | 2 | pdbRequest.setWantedFields(wantedFields); |
252 | 2 | pdbRequest.setSearchTerm(buildQuery(seq) + ")"); |
253 | 2 | pdbRequest.setAssociatedSequence(seq); |
254 | 2 | resultList = pdbRestClient.executeRequest(pdbRequest); |
255 | ||
256 | 0 | lastPdbRequest = pdbRequest; |
257 | 0 | return resultList; |
258 | } | |
259 | ||
260 | 11 | public List<FilterOption> getAvailableFilterOptions(String VIEWS_FILTER) |
261 | { | |
262 | 11 | List<FilterOption> filters = new ArrayList<FilterOption>(); |
263 | 11 | filters.add(new FilterOption( |
264 | "PDBe " + MessageManager.getString("label.best_quality"), | |
265 | "overall_quality", VIEWS_FILTER, false, this)); | |
266 | 11 | filters.add(new FilterOption( |
267 | "PDBe " + MessageManager.getString("label.best_resolution"), | |
268 | "resolution", VIEWS_FILTER, false, this)); | |
269 | 11 | filters.add(new FilterOption( |
270 | "PDBe " + MessageManager.getString("label.most_protein_chain"), | |
271 | "number_of_protein_chains", VIEWS_FILTER, false, this)); | |
272 | 11 | filters.add(new FilterOption( |
273 | "PDBe " + MessageManager | |
274 | .getString("label.most_bound_molecules"), | |
275 | "number_of_bound_molecules", VIEWS_FILTER, false, this)); | |
276 | 11 | filters.add(new FilterOption( |
277 | "PDBe " + MessageManager | |
278 | .getString("label.most_polymer_residues"), | |
279 | "number_of_polymer_residues", VIEWS_FILTER, true, this)); | |
280 | ||
281 | 11 | return filters; |
282 | } | |
283 | ||
284 | 0 | @Override |
285 | public boolean needsRefetch(FilterOption selectedFilterOpt) | |
286 | { | |
287 | // PDBe queries never need a refetch first | |
288 | 0 | return false; |
289 | } | |
290 | ||
291 | /** | |
292 | * FTSRestClient specific query builder to pick top ranked entry from a | |
293 | * fetchStructuresMetaData query | |
294 | * | |
295 | * @param seq | |
296 | * - seq to generate a query for | |
297 | * @param wantedFields | |
298 | * - fields to retrieve | |
299 | * @param selectedFilterOpt | |
300 | * - criterion for ranking results (e.g. resolution) | |
301 | * @param b | |
302 | * - sort ascending or descending | |
303 | * @return | |
304 | * @throws Exception | |
305 | */ | |
306 | 0 | public FTSRestResponse selectFirstRankedQuery(SequenceI seq, |
307 | Collection<FTSData> collectedResults, | |
308 | Collection<FTSDataColumnI> wantedFields, String fieldToFilterBy, | |
309 | boolean b) throws Exception | |
310 | { | |
311 | ||
312 | 0 | FTSRestResponse resultList; |
313 | 0 | FTSRestRequest pdbRequest = new FTSRestRequest(); |
314 | 0 | if (fieldToFilterBy.equalsIgnoreCase("uniprot_coverage")) |
315 | { | |
316 | 0 | pdbRequest.setAllowEmptySeq(false); |
317 | 0 | pdbRequest.setResponseSize(1); |
318 | 0 | pdbRequest.setFieldToSearchBy("("); |
319 | 0 | pdbRequest.setSearchTerm(buildQuery(seq) + ")"); |
320 | 0 | pdbRequest.setWantedFields(wantedFields); |
321 | 0 | pdbRequest.setAssociatedSequence(seq); |
322 | 0 | pdbRequest.setFacet(true); |
323 | 0 | pdbRequest.setFacetPivot(fieldToFilterBy + ",entry_entity"); |
324 | 0 | pdbRequest.setFacetPivotMinCount(1); |
325 | } | |
326 | else | |
327 | { | |
328 | 0 | pdbRequest.setAllowEmptySeq(false); |
329 | 0 | pdbRequest.setResponseSize(1); |
330 | 0 | pdbRequest.setFieldToSearchBy("("); |
331 | 0 | pdbRequest.setFieldToSortBy(fieldToFilterBy, b); |
332 | 0 | pdbRequest.setSearchTerm(buildQuery(seq) + ")"); |
333 | 0 | pdbRequest.setWantedFields(wantedFields); |
334 | 0 | pdbRequest.setAssociatedSequence(seq); |
335 | } | |
336 | 0 | resultList = pdbRestClient.executeRequest(pdbRequest); |
337 | ||
338 | 0 | lastPdbRequest = pdbRequest; |
339 | 0 | return resultList; |
340 | } | |
341 | ||
342 | 0 | @Override |
343 | public PDBEntry[] collectSelectedRows(JTable restable, int[] selectedRows, | |
344 | List<SequenceI> selectedSeqsToView) | |
345 | { | |
346 | 0 | int refSeqColIndex = restable.getColumn("Ref Sequence").getModelIndex(); |
347 | ||
348 | 0 | PDBEntry[] pdbEntriesToView = new PDBEntry[selectedRows.length]; |
349 | 0 | int count = 0; |
350 | 0 | int idColumnIndex = -1; |
351 | 0 | boolean fromTDB = true; |
352 | 0 | idColumnIndex = restable.getColumn("PDB Id").getModelIndex(); |
353 | ||
354 | 0 | for (int row : selectedRows) |
355 | { | |
356 | ||
357 | 0 | String pdbIdStr = restable.getValueAt(row, idColumnIndex).toString(); |
358 | 0 | SequenceI selectedSeq = (SequenceI) restable.getValueAt(row, |
359 | refSeqColIndex); | |
360 | 0 | selectedSeqsToView.add(selectedSeq); |
361 | 0 | PDBEntry pdbEntry = selectedSeq.getPDBEntry(pdbIdStr); |
362 | 0 | if (pdbEntry == null) |
363 | { | |
364 | 0 | pdbEntry = getFindEntry(pdbIdStr, selectedSeq.getAllPDBEntries()); |
365 | } | |
366 | ||
367 | 0 | if (pdbEntry == null) |
368 | { | |
369 | 0 | pdbEntry = new PDBEntry(); |
370 | 0 | pdbEntry.setId(pdbIdStr); |
371 | 0 | pdbEntry.setType(PDBEntry.Type.MMCIF); |
372 | 0 | selectedSeq.getDatasetSequence().addPDBId(pdbEntry); |
373 | } | |
374 | 0 | pdbEntriesToView[count++] = pdbEntry; |
375 | } | |
376 | 0 | return pdbEntriesToView; |
377 | } | |
378 | ||
379 | 0 | @Override |
380 | protected FTSRestRequest getLastFTSRequest() | |
381 | { | |
382 | 0 | return lastPdbRequest; |
383 | } | |
384 | ||
385 | 32 | public FTSRestResponse executePDBFTSRestRequest(FTSRestRequest pdbRequest) |
386 | throws Exception | |
387 | { | |
388 | 32 | return pdbRestClient.executeRequest(pdbRequest); |
389 | } | |
390 | ||
391 | } |