Changeset 1760:165df4199ec9 in orange-bioinformatics for _bioinformatics/obiKEGG/databases.py


Ignore:
Timestamp:
04/23/13 14:09:20 (12 months ago)
Author:
Ales Erjavec <ales.erjavec@…>
Branch:
default
Message:

Optimized DBDatabase batch_get/pre_cache w.r.t. number of separate network requests.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • _bioinformatics/obiKEGG/databases.py

    r1747 r1760  
    77 
    88import re 
     9from contextlib import closing 
    910 
    1011from . import entry 
     
    193194            keys = self.keys() 
    194195 
    195         keys = list(keys) 
     196        keys = map(self._add_db, keys) 
     197 
     198        get = self.api.get 
     199 
     200        # drop all keys with a valid cache entry to minimize the number 
     201        # of 'get' requests. 
     202        with closing(get.cache_store()) as store: 
     203            def is_uncached(key): 
     204                return not get.key_has_valid_cache(get.key_from_args((key,)), 
     205                                                   store) 
     206            keys = filter(is_uncached, keys) 
     207 
     208        start = 0 
     209 
     210        while start < len(keys): 
     211            batch = keys[start: start + batch_size] 
     212            self.api.get(batch) 
     213 
     214            if progress_callback: 
     215                progress_callback(100.0 * start / len(keys)) 
     216 
     217            start += batch_size 
     218 
     219    def batch_get(self, keys): 
     220        """ 
     221        Batch retrieve all entries for keys. This can be significantly 
     222        faster then getting each entry separately especially if entries 
     223        are not yet cached. 
     224 
     225        """ 
     226        entries = [] 
     227        batch_size = 10 
     228        keys = map(self._add_db, keys) 
     229 
     230        # Precache the entries first 
     231        self.pre_cache(keys) 
     232 
    196233        start = 0 
    197234        while start < len(keys): 
    198235            batch = keys[start: start + batch_size] 
    199             batch = map(self._add_db, batch) 
    200  
    201             self.api.get(batch) 
    202  
    203             if progress_callback: 
    204                 progress_callback(100.0 * start / len(keys)) 
    205  
    206             start += batch_size 
    207  
    208     def batch_get(self, keys): 
    209         """ 
    210         Batch retrieve all entries for keys. This can be significantly 
    211         faster then getting each entry separately especially if entries 
    212         are not yet cached. 
    213  
    214         """ 
    215         entries = [] 
    216         batch_size = 10 
    217         keys = list(keys) 
    218         start = 0 
    219         while start < len(keys): 
    220             batch = keys[start: start + batch_size] 
    221             batch = map(self._add_db, batch) 
    222236            batch_entries = self.api.get(batch) 
    223237            if batch_entries is not None: 
Note: See TracChangeset for help on using the changeset viewer.