Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1'''Caching utilities''' 

2import io 

3import os 

4import re 

5import six 

6import sys 

7import json 

8import time 

9import atexit 

10import inspect 

11import requests 

12import tempfile 

13import mimetypes 

14import subprocess # nosec 

15import pandas as pd 

16import tornado.template 

17from threading import Thread 

18from six.moves.queue import Queue 

19from orderedattrdict import AttrDict 

20from tornado.concurrent import Future 

21from tornado.ioloop import IOLoop, PeriodicCallback 

22from gramex.config import app_log, merge, used_kwargs, CustomJSONDecoder, CustomJSONEncoder 

23from six.moves.urllib_parse import urlparse 

24 

25 

26MILLISECOND = 0.001 # in seconds 

27_opener_defaults = dict(mode='r', buffering=-1, encoding='utf-8', errors='strict', 

28 newline=None, closefd=True) 

29_markdown_defaults = dict(output_format='html5', extensions=[ 

30 'markdown.extensions.codehilite', 

31 'markdown.extensions.extra', 

32 'markdown.extensions.toc', 

33 'markdown.extensions.meta', 

34 'markdown.extensions.sane_lists', 

35 'markdown.extensions.smarty', 

36]) 

37# A set of temporary files to delete on program exit 

38_TEMP_FILES = set() 

39_ID_CACHE = set() 

40 

41 

42def _delete_temp_files(): 

43 for path in _TEMP_FILES: 

44 if os.path.exists(path): 

45 os.remove(path) 

46 

47 

48atexit.register(_delete_temp_files) 

49 

50 

51def hashfn(fn): 

52 '''Returns a unique hash value for the function.''' 

53 # id() returns a unique value for the lifetime of an object. 

54 # To ensure that ID is not re-cycled, cache object, so it's never released. 

55 _ID_CACHE.add(fn) 

56 return id(fn) 

57 

58 

59def cache_key(*args): 

60 '''Converts arguments into a string suitable for use as a cache key''' 

61 return json.dumps(args, sort_keys=True, separators=(',', ':')) 

62 

63 

64def opener(callback, read=False, **open_kwargs): 

65 ''' 

66 Converts any function that accepts a string or handle as its parameter into 

67 a function that takes the first parameter from a file path. 

68 

69 Here are a few examples:: 

70 

71 jsonload = opener(json.load) 

72 jsonload('x.json') # opens x.json and runs json.load(handle) 

73 gramex.cache.open('x.json', jsonload) # Loads x.json, cached 

74 

75 # read=True parameter passes the contents (not handle) to the function 

76 template = opener(string.Template, read=True) 

77 template('abc.txt').substitute(x=val) 

78 gramex.cache.open('abc.txt', template).substitute(x=val) 

79 

80 # If read=True, callback may be None. The result of .read() is passed as-is 

81 text = opener(None, read=True) 

82 gramex.cache.open('abc.txt', text) 

83 

84 Keyword arguments applicable for ``io.open`` are passed to ``io.open``. These 

85 default to ``io.open(mode='r', buffering=-1, encoding='utf-8', 

86 errors='strict', newline=None, closefd=True)``. All other arguments and 

87 keyword arguments are passed to the callback (e.g. to ``json.load``). 

88 

89 When reading binary files, pass ``mode='rb', encoding=None, errors=None``. 

90 ''' 

91 merge(open_kwargs, _opener_defaults, 'setdefault') 

92 if read: 

93 # Pass contents to callback 

94 def method(path, **kwargs): 

95 open_args = {key: kwargs.pop(key, val) for key, val in open_kwargs.items()} 

96 with io.open(path, **open_args) as handle: 

97 result = handle.read() 

98 return callback(result, **kwargs) if callable(callback) else result 

99 else: 

100 if not callable(callback): 

101 raise ValueError('opener callback %s not a function', repr(callback)) 

102 

103 # Pass handle to callback 

104 def method(path, **kwargs): 

105 open_args = {key: kwargs.pop(key, val) for key, val in open_kwargs.items()} 

106 with io.open(path, **open_args) as handle: 

107 return callback(handle, **kwargs) 

108 return method 

109 

110 

111@opener 

112def _markdown(handle, **kwargs): 

113 from markdown import markdown 

114 return markdown(handle.read(), **{k: kwargs.pop(k, v) for k, v in _markdown_defaults.items()}) 

115 

116 

117@opener 

118def _yaml(handle, **kwargs): 

119 import yaml 

120 defaults = {'Loader': yaml.FullLoader} 

121 return yaml.load(handle.read(), **{k: kwargs.pop(k, v) for k, v in defaults.items()}) 

122 

123 

124def _template(path, **kwargs): 

125 root, name = os.path.split(path) 

126 return tornado.template.Loader(root, **kwargs).load(name) 

127 

128 

129def stat(path): 

130 ''' 

131 Returns a file status tuple - based on file last modified time and file size 

132 ''' 

133 if os.path.exists(path): 

134 stat = os.stat(path) 

135 return (stat.st_mtime, stat.st_size) 

136 return (None, None) 

137 

138 

139def hashed(val): 

140 '''Return the hashed value of val. If not possible, return None''' 

141 try: 

142 hash(val) 

143 return val 

144 except TypeError: 

145 try: 

146 return json.dumps(val, sort_keys=True, separators=(',', ':')) 

147 except Exception: 

148 return None 

149 

150 

151# gramex.cache.open() stores its cache here. 

152# {(path, callback): {data: ..., stat: ...}} 

153_OPEN_CACHE = {} 

154_OPEN_CALLBACKS = dict( 

155 bin=opener(None, read=True, mode='rb', encoding=None, errors=None), 

156 txt=opener(None, read=True), 

157 text=opener(None, read=True), 

158 csv=pd.read_csv, 

159 excel=pd.read_excel, 

160 xls=pd.read_excel, 

161 xlsx=pd.read_excel, 

162 hdf=pd.read_hdf, 

163 h5=pd.read_hdf, 

164 html=pd.read_html, 

165 jsondata=pd.read_json, 

166 sas=pd.read_sas, 

167 stata=pd.read_stata, 

168 table=pd.read_table, 

169 parquet=pd.read_parquet, 

170 feather=pd.read_feather, 

171 md=_markdown, 

172 markdown=_markdown, 

173 tmpl=_template, 

174 template=_template, 

175 yml=_yaml, 

176 yaml=_yaml 

177) 

178 

179 

180def open(path, callback=None, transform=None, rel=False, **kwargs): 

181 ''' 

182 Reads a file, processes it via a callback, caches the result and returns it. 

183 When called again, returns the cached result unless the file has updated. 

184 

185 By default, it determine the file type using the extension. For example:: 

186 

187 open('data.yaml') # Loads a YAML file 

188 open('data.csv') # Loads a CSV file 

189 

190 The 2nd parameter (callback) accepts a predefined string that can be one of: 

191 

192 - ``bin``: reads binary files using io.open 

193 - ``text`` or ``txt``: reads text files using io.open 

194 - ``yaml``: reads files using yaml.load via io.open 

195 - ``config``: reads files using using :py:class:`gramex.config.PathConfig`. 

196 Same as ``yaml``, but allows ``import:`` and variable substitution. 

197 - ``json``: reads files using json.load via io.open 

198 - ``jsondata``: reads files using pd.read_json 

199 - ``template``: reads files using tornado.Template via io.open 

200 - ``markdown`` or ``md``: reads files using markdown.markdown via io.open 

201 - ``csv``, ``excel``, ``xls``, `xlsx``, ``hdf``, ``h5``, ``html``, ``sas``, 

202 ``stata``, ``table``, ``parquet``, ``feather``: reads using Pandas 

203 - ``xml``, ``svg``, ``rss``, ``atom``: reads using lxml.etree 

204 

205 For example:: 

206 

207 # Load data.yaml as YAML into an AttrDict 

208 open('data.yaml', 'yaml') 

209 

210 # Load data.json as JSON into an AttrDict 

211 open('data.json', 'json', object_pairs_hook=AttrDict) 

212 

213 # Load data.csv as CSV into a Pandas DataFrame 

214 open('data.csv', 'csv', encoding='cp1252') 

215 

216 It can also be a function that accepts the filename and any other arguments:: 

217 

218 # Load data using a custom callback 

219 open('data.fmt', my_format_reader_function, arg='value') 

220 

221 This is called as ``my_format_reader_function('data.fmt', arg='value')`` and 

222 cached. Future calls do not re-load and re-calculate this data. 

223 

224 ``transform=`` is an optioanl function that processes the data returned by 

225 the callback. For example:: 

226 

227 # Returns the count of the CSV file, updating it only when changed 

228 open('data.csv', 'csv', transform=lambda data: len(data)) 

229 

230 # After loading data.xlsx into a DataFrame, returned the grouped result 

231 open('data.xlsx', 'xslx', transform=lambda data: data.groupby('city')['sales'].sum()) 

232 

233 If ``transform=`` is not a callable, it is ignored. 

234 

235 ``rel=True`` opens the path relative to the caller function's file path. If 

236 ``D:/app/calc.py`` calls ``open('data.csv', 'csv', rel=True)``, the path 

237 is replaced with ``D:/app/data.csv``. 

238 

239 Any other keyword arguments are passed directly to the callback. If the 

240 callback is a predefined string and uses io.open, all argument applicable to 

241 io.open are passed to io.open and the rest are passed to the callback. 

242 ''' 

243 # Pass _reload_status = True for testing purposes. This returns a tuple: 

244 # (result, reloaded) instead of just the result. 

245 _reload_status = kwargs.pop('_reload_status', False) 

246 reloaded = False 

247 _cache = kwargs.pop('_cache', _OPEN_CACHE) 

248 

249 # Get the parent frame's filename. Compute path relative to that. 

250 if rel: 

251 stack = inspect.getouterframes(inspect.currentframe(), 2) 

252 folder = os.path.dirname(os.path.abspath(stack[1][1])) 

253 path = os.path.join(folder, path) 

254 

255 original_callback = callback 

256 if callback is None: 

257 callback = os.path.splitext(path)[-1][1:] 

258 callback_is_str = isinstance(callback, six.string_types) 

259 key = ( 

260 path, 

261 original_callback if callback_is_str else id(callback), 

262 hashfn(transform), 

263 frozenset(((k, hashed(v)) for k, v in kwargs.items())), 

264 ) 

265 cached = _cache.get(key, None) 

266 fstat = stat(path) 

267 if cached is None or fstat != cached.get('stat'): 

268 reloaded = True 

269 if callable(callback): 

270 data = callback(path, **kwargs) 

271 elif callback_is_str: 

272 method = None 

273 if callback in _OPEN_CALLBACKS: 

274 method = _OPEN_CALLBACKS[callback] 

275 elif callback in {'json'}: 

276 import json 

277 method = opener(json.load) 

278 elif callback in {'config'}: 

279 from gramex.config import PathConfig 

280 method = PathConfig 

281 elif callback in {'xml', 'svg', 'rss', 'atom'}: 

282 from lxml import etree 

283 method = etree.parse 

284 

285 if method is not None: 

286 data = method(path, **kwargs) 

287 elif original_callback is None: 

288 raise TypeError('gramex.cache.open: path "%s" has unknown extension' % path) 

289 else: 

290 raise TypeError('gramex.cache.open(callback="%s") is not a known type' % callback) 

291 else: 

292 raise TypeError('gramex.cache.open(callback=) must be a function, not %r' % callback) 

293 if callable(transform): 

294 data = transform(data) 

295 _cache[key] = {'data': data, 'stat': fstat} 

296 

297 result = _cache[key]['data'] 

298 return (result, reloaded) if _reload_status else result 

299 

300 

301def set_cache(cache, old_cache): 

302 ''' 

303 Use ``cache`` as the new cache for all open requests. 

304 Copies keys from old cache, and deletes them from the old cache. 

305 ''' 

306 for key in list(old_cache.keys()): 

307 cache[key] = old_cache[key] 

308 del old_cache[key] 

309 return cache 

310 

311 

312_SAVE_CALLBACKS = dict( 

313 json='to_json', 

314 csv='to_csv', 

315 xlsx='to_excel', 

316 hdf='to_hdf', 

317 html='to_html', 

318 stata='to_stata', 

319 # Other configurations not supported 

320) 

321 

322 

323def save(data, url, callback=None, **kwargs): 

324 ''' 

325 Saves a DataFrame into file at url. It does not cache. 

326 

327 ``callback`` is almost the same as for :py:func:`gramex.cache.open`. It can 

328 be ``json``, ``csv``, ``xlsx``, ``hdf``, ``html``, ``stata`` or 

329 a function that accepts the filename and any other arguments. 

330 

331 Other keyword arguments are passed directly to the callback. 

332 ''' 

333 if callback is None: 333 ↛ 335line 333 didn't jump to line 335, because the condition on line 333 was never false

334 callback = os.path.splitext(url)[-1][1:] 

335 if callable(callback): 335 ↛ 336line 335 didn't jump to line 336, because the condition on line 335 was never true

336 return callback(data, url, **kwargs) 

337 elif callback in _SAVE_CALLBACKS: 337 ↛ 341line 337 didn't jump to line 341, because the condition on line 337 was never false

338 method = getattr(data, _SAVE_CALLBACKS[callback]) 

339 return method(url, **(used_kwargs(method, kwargs)[0])) 

340 else: 

341 raise TypeError('gramex.cache.save(callback="%s") is unknown' % callback) 

342 

343 

344# gramex.cache.query() stores its cache here 

345_QUERY_CACHE = {} 

346_STATUS_METHODS = {} 

347 

348 

349def _wheres(dbkey, tablekey, default_db, names, fn=None): 

350 ''' 

351 Convert a table name list like ['sales', 'dept.sales']) to a WHERE clause 

352 like ``(table="sales") OR (db="dept" AND table="sales")``. 

353 

354 TODO: escape the table names to avoid SQL injection attacks 

355 ''' 

356 where = [] 

357 for name in names: 

358 db, table = name.rsplit('.', 2) if '.' in name else (default_db, name) 

359 if not fn: 359 ↛ 362line 359 didn't jump to line 362, because the condition on line 359 was never false

360 where.append("({}='{}' AND {}='{}')".format(dbkey, db, tablekey, table)) 

361 else: 

362 where.append("({}={}('{}') AND {}={}('{}'))".format( 

363 dbkey, fn[0], db, tablekey, fn[1], table)) 

364 return ' OR '.join(where) 

365 

366 

367def _table_status(engine, tables): 

368 ''' 

369 Returns the last updated date of a list of tables. 

370 ''' 

371 # Cache the SQL query or file date check function beforehand. 

372 # Every time method is called with a URL and table list, run cached query 

373 dialect = engine.dialect.name 

374 key = (engine.url, tuple(tables)) 

375 db = engine.url.database 

376 if _STATUS_METHODS.get(key, None) is None: 

377 if len(tables) == 0: 

378 raise ValueError('gramex.cache.query table list is empty: %s', repr(tables)) 

379 for name in tables: 

380 if not name or not isinstance(name, six.string_types): 

381 raise ValueError('gramex.cache.query invalid table list: %s', repr(tables)) 

382 if dialect == 'mysql': 382 ↛ 385line 382 didn't jump to line 385, because the condition on line 382 was never true

383 # https://dev.mysql.com/doc/refman/5.7/en/tables-table.html 

384 # Works only on MySQL 5.7 and above 

385 q = ('SELECT update_time FROM information_schema.tables WHERE ' + 

386 _wheres('table_schema', 'table_name', db, tables)) 

387 elif dialect == 'mssql': 387 ↛ 389line 387 didn't jump to line 389, because the condition on line 387 was never true

388 # https://goo.gl/b4aL9m 

389 q = ('SELECT last_user_update FROM sys.dm_db_index_usage_stats WHERE ' + 

390 _wheres('database_id', 'object_id', db, tables, fn=['DB_ID', 'OBJECT_ID'])) 

391 elif dialect == 'postgresql': 

392 # https://www.postgresql.org/docs/9.6/static/monitoring-stats.html 

393 q = ('SELECT n_tup_ins, n_tup_upd, n_tup_del FROM pg_stat_all_tables WHERE ' + 

394 _wheres('schemaname', 'relname', 'public', tables)) 

395 elif dialect == 'sqlite': 395 ↛ 400line 395 didn't jump to line 400, because the condition on line 395 was never false

396 if not db: 396 ↛ 397line 396 didn't jump to line 397, because the condition on line 396 was never true

397 raise KeyError('gramex.cache.query does not support memory sqlite "%s"' % dialect) 

398 q = db 

399 else: 

400 raise KeyError('gramex.cache.query cannot cache dialect "%s" yet' % dialect) 

401 if dialect == 'sqlite': 

402 _STATUS_METHODS[key] = lambda: stat(q) 

403 else: 

404 _STATUS_METHODS[key] = lambda: pd.read_sql(q, engine).to_json(orient='records') 

405 return _STATUS_METHODS[key]() 

406 

407 

408def query(sql, engine, state=None, **kwargs): 

409 ''' 

410 Read SQL query or database table into a DataFrame. Caches results unless 

411 state has changed. It always re-runs the query unless state is specified. 

412 

413 The state can be specified in 3 ways: 

414 

415 1. A string. This must be as a lightweight SQL query. If the result changes, 

416 the original SQL query is re-run. 

417 2. A function. This is called to determine the state of the database. 

418 3. A list of tables. This list of ["db.table"] names specifies which tables 

419 to watch for. This is currently experimental. 

420 4. ``None``: the default. The query is always re-run and not cached. 

421 ''' 

422 # Pass _reload_status = True for testing purposes. This returns a tuple: 

423 # (result, reloaded) instead of just the result. 

424 _reload_status = kwargs.pop('_reload_status', False) 

425 reloaded = False 

426 _cache = kwargs.pop('_cache', _QUERY_CACHE) 

427 store_cache = True 

428 

429 key = (str(sql), json.dumps(kwargs.get('params', {}), sort_keys=True), engine.url) 

430 current_status = _cache.get(key, {}).get('status', None) 

431 if isinstance(state, (list, tuple)): 

432 status = _table_status(engine, tuple(state)) 

433 elif isinstance(state, six.string_types): 

434 status = pd.read_sql(state, engine).to_dict(orient='list') 

435 elif callable(state): 

436 status = state() 

437 elif state is None: 

438 # Create a new status every time, so that the query is always re-run 

439 status = object() 

440 store_cache = False 

441 else: 

442 raise TypeError('gramex.cache.query(state=) must be a table list, query or fn, not %s', 

443 repr(state)) 

444 

445 if status == current_status: 

446 result = _cache[key]['data'] 

447 else: 

448 app_log.debug('gramex.cache.query: %s. engine: %s. state: %s. kwargs: %s', sql, engine, 

449 state, kwargs) 

450 result = pd.read_sql(sql, engine, **kwargs) 

451 if store_cache: 

452 _cache[key] = { 

453 'data': result, 

454 'status': status, 

455 } 

456 reloaded = True 

457 

458 return (result, reloaded) if _reload_status else result 

459 

460 

461# gramex.cache.reload_module() stores its cache here. {module_name: file_stat} 

462_MODULE_CACHE = {} 

463 

464 

465def reload_module(*modules): 

466 ''' 

467 Reloads one or more modules if they are outdated, i.e. only if required the 

468 underlying source file has changed. 

469 

470 For example:: 

471 

472 import mymodule # Load cached module 

473 reload_module(mymodule) # Reload module if the source has changed 

474 

475 This is most useful during template development. If your changes are in a 

476 Python module, add adding these lines to pick up new module changes when 

477 the template is re-run. 

478 ''' 

479 for module in modules: 

480 name = getattr(module, '__name__', None) 

481 path = getattr(module, '__file__', None) 

482 # sys.__file__ does not exist, but don't raise a warning. You can't reload it 

483 if name in {'sys'}: 483 ↛ 484line 483 didn't jump to line 484, because the condition on line 483 was never true

484 continue 

485 if name is None or path is None or not os.path.exists(path): 485 ↛ 486line 485 didn't jump to line 486, because the condition on line 485 was never true

486 app_log.warning('Path for module %s is %s: not found', name, path) 

487 continue 

488 # On Python 3, __file__ points to the .py file. In Python 2, it's the .pyc file 

489 # https://www.python.org/dev/peps/pep-3147/#file 

490 if path.lower().endswith('.pyc'): 490 ↛ 491line 490 didn't jump to line 491, because the condition on line 490 was never true

491 path = path[:-1] 

492 if not os.path.exists(path): 

493 app_log.warning('Path for module %s is %s: not found', name, path) 

494 continue 

495 # The first time, don't reload it. Thereafter, if it's older or resized, reload it 

496 fstat = stat(path) 

497 if fstat != _MODULE_CACHE.get(name, fstat): 

498 app_log.info('Reloading module %s', name) 

499 six.moves.reload_module(module) 

500 _MODULE_CACHE[name] = fstat 

501 

502 

503def urlfetch(path, info=False, **kwargs): 

504 ''' 

505 - If path is a file path, return as is. 

506 - If path is a file path and info is true, return a dict with name (filepath), 

507 ext (extension), and content_type as well as r, url set to None. 

508 - If path is a URL, download the file, return the saved filename. 

509 The filename extension is based on the URL's Content-Type HTTP header. 

510 - If info is true, returns a dict with name (filename), r (request) 

511 url, ext (extension), content_type. 

512 - Any other keyword arguments are passed to requests.get. 

513 - Automatically delete the files on exit of the application. 

514 - This is a synchronous function, i.e. it waits until the file is downloaded. 

515 ''' 

516 url = urlparse(path) 

517 if url.scheme not in {'http', 'https'}: # path is a filepath 

518 if info: 

519 ext = os.path.splitext(path)[1] 

520 content_type = mimetypes.guess_type(path, strict=True)[0] 

521 return {'name': path, 'r': None, 'url': None, 'ext': ext, 'content_type': content_type} 

522 else: 

523 return path 

524 r = requests.get(path, **kwargs) 

525 if 'Content-Type' in r.headers: 

526 content_type = r.headers['Content-Type'].split(';')[0] 

527 ext = mimetypes.guess_extension(content_type, strict=False) 

528 else: 

529 ext = os.path.splitext(url.path)[1] 

530 with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as handle: 

531 for chunk in r.iter_content(chunk_size=16384): 

532 handle.write(chunk) 

533 _TEMP_FILES.add(handle.name) 

534 if info: 

535 return {'name': handle.name, 'r': r, 'url': url, 'ext': ext, 'content_type': content_type} 

536 else: 

537 return handle.name 

538 

539 

540class Subprocess(object): 

541 ''' 

542 tornado.process.Subprocess does not work on Windows. 

543 https://github.com/tornadoweb/tornado/issues/1585 

544 

545 This is a threaded alternative based on 

546 http://stackoverflow.com/a/4896288/100904 

547 

548 Run a program async and wait for it to execute. Then get its output:: 

549 

550 stdout, stderr = yield Subprocess(['ls', '-la']).wait_for_exit() 

551 

552 Run a program async and send each line to the handler as it writes:: 

553 

554 yield Subprocess( 

555 ['ls', '-la'], # Run 'ls -la' 

556 buffer_size='line', # Buffer output line by line 

557 stream_stdout=handler.write, # Send output to handler.write(line) 

558 stream_stderr=handler.write, # Send errors to handler.write(line) 

559 ) 

560 

561 Run a program async and appends output into a list:: 

562 

563 proc = Subprocess( 

564 ['ls', '-la'], 

565 buffer_size='line', 

566 stream_stdout='list_out', # Append output to self.list_out 

567 stream_stderr='list_err', # Append errors to self.list_err 

568 ) 

569 output = proc.list_out[-10:] # Return last 10 lines of output 

570 yield proc.wait_for_exit() # Wait until application is done 

571 

572 Run a program async and appends output into a queue:: 

573 

574 proc = Subprocess( 

575 ['ls', '-la'], # Run 'ls -la' 

576 buffer_size='line', # Buffer output line by line 

577 stream_stdout='queue_out', # Save output in proc.out queue 

578 stream_stderr='queue_err', # Save errors in proc.err queue 

579 ) 

580 output = proc.queue_out.get_nowait() # Returns first line of output 

581 yield proc.wait_for_exit() # Wait until application is done 

582 

583 To write to multiple streams, pass a list:: 

584 

585 proc = Subprocess( 

586 args, 

587 buffer_size='line', 

588 stream_stdout=[handler.write, 'list_out', 'queue_out', my_callback], 

589 stream_stderr=[handler.write, 'list_err', 'queue_err', my_callback], 

590 **kwargs 

591 ) 

592 yield proc.wait_for_exit() 

593 

594 To check the process return code, use ``.proc`` which has the ``Popen`` 

595 object:: 

596 

597 if proc.proc.returncode: 

598 raise Exception('Process failed with return code %d', proc.proc.returncode) 

599 

600 :arg list args: command line arguments passed as a list to Subprocess 

601 :arg methodlist stream_stdout: optional list of write methods - called when stdout has data 

602 :arg methodlist stream_stderr: optional list of write methods - called when stderr has data 

603 :arg str_or_int buffer_size: 'line' to write line by line, any int for chunk size 

604 :arg dict kwargs: additional kwargs passed to subprocess.Popen 

605 

606 stream_stdout and stream_stderr can be: 

607 

608 - a function that accept a byte string. Called as stdout/stderr are buffered 

609 - OR a string starting with ``list_`` or ``queue_``. Appends buffered output 

610 - OR a list of any of the above 

611 - OR an empty list. In this case, ``.wait_for_exit()`` returns a tuple with 

612 ``stdout`` and ``stderr`` as a tuple of byte strings. 

613 ''' 

614 

615 def __init__(self, args, stream_stdout=[], stream_stderr=[], buffer_size=0, **kwargs): 

616 self.args = args 

617 

618 # self.proc.stdout & self.proc.stderr are streams with process output 

619 kwargs['stdout'] = kwargs['stderr'] = subprocess.PIPE 

620 

621 # On UNIX, close all file descriptors except 0, 1, 2 before child 

622 # process is executed. I've no idea why. Copied from 

623 # http://stackoverflow.com/a/4896288/100904 

624 kwargs['close_fds'] = 'posix' in sys.builtin_module_names 

625 

626 self.proc = subprocess.Popen(args, **kwargs) # nosec 

627 self.thread = {} # Has the running threads 

628 self.future = {} # Stores the futures indicating stream close 

629 self.loop = _get_current_ioloop() 

630 

631 # Buffering has 2 modes. buffer_size='line' reads and writes line by line 

632 # buffer_size=<number> reads in byte chunks. Define the appropriate method 

633 if hasattr(buffer_size, 'lower') and 'line' in buffer_size.lower(): 

634 def _write(stream, callbacks, future, retval): 

635 '''Call callbacks with content from stream. On EOF mark future as done''' 

636 while True: 

637 content = stream.readline() 

638 if len(content) > 0: 

639 if isinstance(content, six.text_type): 

640 content = content.encode('utf-8') 

641 for callback in callbacks: 

642 callback(content) 

643 else: 

644 stream.close() 

645 break 

646 while self.proc.poll() is None: 

647 time.sleep(MILLISECOND) 

648 self.loop.add_callback(future.set_result, retval()) 

649 else: 

650 # If the buffer size is 0 or negative, use the default buffer size to read 

651 if buffer_size <= 0: 

652 buffer_size = io.DEFAULT_BUFFER_SIZE 

653 

654 def _write(stream, callbacks, future, retval): 

655 '''Call callbacks with content from stream. On EOF mark future as done''' 

656 while True: 

657 content = stream.read(buffer_size) 

658 size = len(content) 

659 if size > 0: 

660 if isinstance(content, six.text_type): 

661 content = content.encode('utf-8') 

662 for callback in callbacks: 

663 # This may raise a ValueError: write to closed file. 

664 # TODO: decide how to handle it. 

665 callback(content) 

666 if size < buffer_size: 

667 stream.close() 

668 break 

669 while self.proc.poll() is None: 

670 time.sleep(MILLISECOND) 

671 self.loop.add_callback(future.set_result, retval()) 

672 

673 callbacks_lookup = {'stdout': stream_stdout, 'stderr': stream_stderr} 

674 for stream in ('stdout', 'stderr'): 

675 callbacks = callbacks_lookup[stream] 

676 # If stream_stdout or stream_stderr are not defined, construct a 

677 # BytesIO and return its value when the stream is closed 

678 if not callbacks: 

679 ret_stream = io.BytesIO() 

680 callbacks = [ret_stream.write] 

681 retval = ret_stream.getvalue 

682 else: 

683 retval = lambda: b'' # noqa 

684 # If stream_stdout or stream_stderr has 'out' or 'err', create these 

685 # as queue attributes (self.out, self.err) 

686 callbacks = list(callbacks) if isinstance(callbacks, list) else [callbacks] 

687 for index, method in enumerate(callbacks): 

688 if isinstance(method, six.string_types): 

689 if method.startswith('list_'): 

690 if hasattr(self, method): 

691 callbacks[index] = getattr(self, method).append 

692 else: 

693 log = [] 

694 setattr(self, method, log) 

695 callbacks[index] = log.append 

696 elif method.startswith('queue_'): 696 ↛ 704line 696 didn't jump to line 704, because the condition on line 696 was never false

697 if hasattr(self, method): 

698 callbacks[index] = getattr(self, method).put 

699 else: 

700 log = Queue() 

701 setattr(self, method, log) 

702 callbacks[index] = log.put 

703 else: 

704 raise ValueError('Invalid stream_%s: %s', stream, method) 

705 self.future[stream] = future = Future() 

706 # Thread writes from self.proc.stdout / stderr to appropriate callbacks 

707 self.thread[stream] = t = Thread( 

708 target=_write, 

709 args=(getattr(self.proc, stream), callbacks, future, retval)) 

710 t.daemon = True # Thread dies with the program 

711 t.start() 

712 

713 def wait_for_exit(self): 

714 ''' 

715 Returns futures for (stdout, stderr). To wait for the process to complete, use:: 

716 

717 stdout, stderr = yield proc.wait_for_exit() 

718 ''' 

719 return [self.future['stdout'], self.future['stderr']] 

720 

721 

722_daemons = {} 

723_regex_type = type(re.compile('')) 

724# Python 3 needs sys.stderr.buffer.write for writing binary strings 

725_stderr_write = sys.stderr.buffer.write if hasattr(sys.stderr, 'buffer') else sys.stderr.write 

726 

727 

728def daemon(args, restart=1, first_line=None, stream=True, timeout=5, buffer_size='line', **kwargs): 

729 ''' 

730 This is the same as :py:class:`Subprocess`, but has a few additional checks. 

731 

732 1. If we have already called :py:class:`Subprocess` with the same arguments, 

733 re-use the same instance. 

734 2. Send the process STDOUT and STDERR to this application's STDERR. This 

735 makes it easy to see what errors the application reports. 

736 3. Supports retry attempts. 

737 4. Checks if the first line of output is a matches a string / re -- ensuring 

738 that the application started properly. 

739 ''' 

740 arg_str = args if isinstance(args, six.string_types) else ' '.join(args) 

741 try: 

742 key = cache_key(arg_str, kwargs) 

743 except (TypeError, ValueError): 

744 app_log.error('daemon args must be JSON serializable') 

745 raise 

746 # Send the stdout and stderr to (a) stderr AND to (b) a local queue we read 

747 queue = Queue(maxsize=10) 

748 for channel in ('stream_stdout', 'stream_stderr'): 

749 if channel not in kwargs: 749 ↛ 751line 749 didn't jump to line 751, because the condition on line 749 was never false

750 kwargs[channel] = [] 

751 elif not isinstance(kwargs[channel], list): 

752 kwargs[channel] = [kwargs[channel]] 

753 if first_line: 

754 kwargs[channel].append(queue.put) 

755 if stream is True: 

756 kwargs[channel].append(_stderr_write) 

757 elif callable(stream): 757 ↛ 748line 757 didn't jump to line 748, because the condition on line 757 was never false

758 kwargs[channel].append(stream) 

759 # Buffer by line by default. This is required for the first_line check, not otherwise. 

760 kwargs['buffer_size'] = buffer_size 

761 # started is set if we actually call Subprocess as part of this function 

762 started = False 

763 

764 # If process was never started, start it 

765 if key not in _daemons: 

766 started = _daemons[key] = Subprocess(args, **kwargs) 

767 

768 # Ensure that process is running. Restart if required 

769 proc = _daemons[key] 

770 restart = int(restart) 

771 while proc.proc.returncode is not None and restart > 0: 

772 restart -= 1 

773 proc = started = _daemons[key] = Subprocess(args, **kwargs) 

774 if proc.proc.returncode is not None: 774 ↛ 775line 774 didn't jump to line 775, because the condition on line 774 was never true

775 raise RuntimeError('Error %d starting %s' % (proc.proc.returncode, arg_str)) 

776 if started: 

777 app_log.info('Started: %s', arg_str) 

778 

779 future = Future() 

780 # If process was started, wait until it has initialized. Else just return the proc 

781 if first_line and started: 

782 if isinstance(first_line, six.string_types): 

783 def check(proc): 

784 actual = queue.get(timeout=timeout).decode('utf-8') 

785 if first_line not in actual: 

786 raise AssertionError('%s: wrong first line: %s (no "%s")' % 

787 (arg_str, actual, first_line)) 

788 elif isinstance(first_line, _regex_type): 788 ↛ 793line 788 didn't jump to line 793, because the condition on line 788 was never false

789 def check(proc): 

790 actual = queue.get(timeout=timeout).decode('utf-8') 

791 if not first_line.search(actual): 791 ↛ 792line 791 didn't jump to line 792, because the condition on line 791 was never true

792 raise AssertionError('%s: wrong first line: %s' % (arg_str, actual)) 

793 elif callable(first_line): 

794 check = first_line 

795 loop = _get_current_ioloop() 

796 

797 def checker(proc): 

798 try: 

799 check(proc) 

800 except Exception as e: 

801 loop.add_callback(future.set_exception, e) 

802 else: 

803 loop.add_callback(future.set_result, proc) 

804 

805 proc._check_thread = t = Thread(target=checker, args=(proc, )) 

806 t.daemon = True # Thread dies with the program 

807 t.start() 

808 else: 

809 future.set_result(proc) 

810 return future 

811 

812 

813def _get_current_ioloop(): 

814 ''' 

815 Return the current IOLoop. But if we're not already in an IOLoop, return an 

816 object that mimics add_callback() by running the method immediately. 

817 This allows daemon() to be run without Tornado / asyncio. 

818 ''' 

819 loop = IOLoop.current(instance=False) 

820 if loop is None: 

821 loop = AttrDict(add_callback=lambda fn, *args, **kwargs: fn(*args, **kwargs)) 

822 return loop 

823 

824 

825def get_store(type, **kwargs): 

826 if type == 'memory': 826 ↛ 827line 826 didn't jump to line 827, because the condition on line 826 was never true

827 return KeyStore(**kwargs) 

828 elif type == 'sqlite': 

829 return SQLiteStore(**kwargs) 

830 elif type == 'json': 830 ↛ 832line 830 didn't jump to line 832, because the condition on line 830 was never false

831 return JSONStore(**kwargs) 

832 elif type == 'redis': 

833 return RedisStore(**kwargs) 

834 elif type == 'hdf5': 

835 return HDF5Store(**kwargs) 

836 else: 

837 raise NotImplementedError('Store type: %s not implemented' % type) 

838 

839 

840class KeyStore(object): 

841 ''' 

842 Base class for persistent dictionaries. (But KeyStore is not persistent.) 

843 

844 >>> store = KeyStore() 

845 >>> value = store.load(key, None) # Load a value. It's like dict.get() 

846 >>> store.dump(key, value) # Save a value. It's like dict.set(), but doesn't flush 

847 >>> store.flush() # Saves to disk 

848 >>> store.close() # Close the store 

849 

850 You can initialize a KeyStore with a ``flush=`` parameter. The store is 

851 flushed to disk via ``store.flush()`` every ``flush`` seconds. 

852 

853 If a ``purge=`` is provided, the data is purged of missing values every 

854 ``purge`` seconds. You can provide a custom ``purge_keys=`` function that 

855 returns an iterator of keys to delete if any. 

856 

857 When the program exits, ``.close()`` is automatically called. 

858 ''' 

859 

860 def __init__(self, flush=None, purge=None, purge_keys=None, **kwargs): 

861 '''Initialise the KeyStore at path''' 

862 self.store = {} 

863 if callable(purge_keys): 

864 self.purge_keys = purge_keys 

865 elif purge_keys is not None: 865 ↛ 866line 865 didn't jump to line 866, because the condition on line 865 was never true

866 app_log.error( 

867 'KeyStore: purge_keys=%r invalid. Must be function(dict)', 

868 purge_keys) 

869 # Periodically flush and purge buffers 

870 if flush is not None: 

871 PeriodicCallback(self.flush, callback_time=flush * 1000).start() 

872 if purge is not None: 

873 PeriodicCallback(self.purge, callback_time=purge * 1000).start() 

874 # Call close() when Python gracefully exits 

875 atexit.register(self.close) 

876 

877 def keys(self): 

878 '''Return all keys in the store''' 

879 return self.store.keys() 

880 

881 def load(self, key, default=None): 

882 '''Same as store.get(), but called "load" to indicate persistence''' 

883 key = self._escape(key) 

884 return self.store.get(key, {} if default is None else default) 

885 

886 def dump(self, key, value): 

887 '''Same as store[key] = value''' 

888 key = self._escape(key) 

889 self.store[key] = value 

890 

891 def _escape(self, key): 

892 '''Converts key into a unicode string (interpreting byte-string keys as UTF-8)''' 

893 if isinstance(key, six.binary_type): 893 ↛ 894line 893 didn't jump to line 894, because the condition on line 893 was never true

894 return six.text_type(key, encoding='utf-8') 

895 return six.text_type(key) 

896 

897 @staticmethod 

898 def purge_keys(data): 

899 return [key for key, val in data.items() if val is None] 

900 

901 def flush(self): 

902 '''Write to disk''' 

903 pass 

904 

905 def purge(self): 

906 '''Delete empty keys and flush''' 

907 for key in self.purge_keys(self.store): 

908 try: 

909 del self.store[key] 

910 except KeyError: 

911 # If the key was already removed from store, ignore 

912 pass 

913 self.flush() 

914 

915 def close(self): 

916 '''Flush and close all open handles''' 

917 raise NotImplementedError() 

918 

919 

920class RedisStore(KeyStore): 

921 ''' 

922 A KeyStore that stores data in a Redis database. Typical usage:: 

923 

924 >>> store = RedisStore('localhost:6379:1:password=x:...') # host:port:db:params 

925 >>> value = store.load(key) 

926 >>> store.dump(key, value) 

927 

928 The path in the constructor contains parameters separated by colon (:): 

929 

930 - `host`: the Redis server location (default: localhost) 

931 - `port`: the Redis server port (default: 6379) 

932 - `db`: the Redis server DB number (default: 0) 

933 - zero or more parameters passed to StrictRedis (e.g. password=abc) 

934 

935 Values are encoded as JSON using gramex.config.CustomJSONEncoder (thus 

936 handling datetime.) Keys are JSON encoded. 

937 ''' 

938 

939 def __init__(self, path=None, *args, **kwargs): 

940 super(RedisStore, self).__init__(*args, **kwargs) 

941 from redis import StrictRedis 

942 host, port, db, redis_kwargs = 'localhost', 6379, 0, {} 

943 if isinstance(path, six.string_types): 

944 parts = path.split(':') 

945 if len(parts): 

946 host = parts.pop(0) 

947 if len(parts): 

948 port = int(parts.pop(0)) 

949 if len(parts): 

950 db = int(parts.pop(0)) 

951 redis_kwargs = dict(part.split('=', 2) for part in parts) 

952 redis_kwargs['decode_responses'] = True 

953 redis_kwargs.setdefault('encoding', 'utf-8') 

954 self.store = StrictRedis(host=host, port=port, db=db, **redis_kwargs) 

955 

956 def load(self, key, default=None): 

957 result = self.store.get(key) 

958 if result is None: 

959 return default 

960 try: 

961 return json.loads( 

962 result, object_pairs_hook=AttrDict, cls=CustomJSONDecoder) 

963 except ValueError: 

964 app_log.error('RedisStore("%s").load("%s") is not JSON ("%r..."")', 

965 self.store, key, result) 

966 return default 

967 

968 def dump(self, key, value): 

969 if value is None: 

970 self.store.delete(key) 

971 else: 

972 value = json.dumps( 

973 value, 

974 ensure_ascii=True, 

975 separators=(',', ':'), 

976 cls=CustomJSONEncoder) 

977 self.store.set(key, value) 

978 

979 def close(self): 

980 pass 

981 

982 def purge(self): 

983 app_log.debug('Purging %s', self.store) 

984 # TODO: optimize item retrieval 

985 items = {key: self.load(key, None) for key in self.store.keys()} 

986 for key in self.purge_keys(items): 

987 self.store.delete(key) 

988 

989 

990class SQLiteStore(KeyStore): 

991 ''' 

992 A KeyStore that stores data in a SQLite file. Typical usage:: 

993 

994 >>> store = SQLiteStore('file.db', table='store') 

995 >>> value = store.load(key) 

996 >>> store.dump(key, value) 

997 

998 Values are encoded as JSON using gramex.config.CustomJSONEncoder (thus 

999 handling datetime.) Keys are JSON encoded. 

1000 ''' 

1001 

1002 def __init__(self, path, table='store', *args, **kwargs): 

1003 super(SQLiteStore, self).__init__(*args, **kwargs) 

1004 self.path = _create_path(path) 

1005 from sqlitedict import SqliteDict 

1006 self.store = SqliteDict( 

1007 self.path, tablename=table, autocommit=True, 

1008 encode=lambda v: json.dumps(v, separators=(',', ':'), ensure_ascii=True, 

1009 cls=CustomJSONEncoder), 

1010 decode=lambda v: json.loads(v, object_pairs_hook=AttrDict, cls=CustomJSONDecoder), 

1011 ) 

1012 

1013 def close(self): 

1014 self.store.close() 

1015 

1016 def flush(self): 

1017 super(SQLiteStore, self).flush() 

1018 self.store.commit() 

1019 

1020 def keys(self): 

1021 # Keys need to be escaped 

1022 return (self._escape(key) for key in self.store.keys()) 

1023 

1024 def purge(self): 

1025 app_log.debug('Purging %s', self.path) 

1026 super(SQLiteStore, self).purge() 

1027 

1028 

1029class HDF5Store(KeyStore): 

1030 ''' 

1031 A KeyStore that stores data in a HDF5 file. Typical usage:: 

1032 

1033 >>> store = HDF5Store('file.h5', flush=15) 

1034 >>> value = store.load(key) 

1035 >>> store.dump(key, value) 

1036 

1037 Internally, it uses HDF5 groups to store data. Values are encoded as JSON 

1038 using gramex.config.CustomJSONEncoder (thus handling datetime.) Keys are JSON 

1039 encoded, and '/' is escaped as well (since HDF5 groups treat / as subgroups.) 

1040 ''' 

1041 

1042 def __init__(self, path, *args, **kwargs): 

1043 super(HDF5Store, self).__init__(*args, **kwargs) 

1044 self.path = _create_path(path) 

1045 self.changed = False 

1046 import h5py 

1047 # h5py.File fails with OSError: Unable to create file (unable to open file: name = 

1048 # '.meta.h5', errno = 17, error message = 'File exists', flags = 15, o_flags = 502) 

1049 # TODO: identify why this happens and resolve it. 

1050 self.store = h5py.File(self.path, 'a') 

1051 

1052 def load(self, key, default=None): 

1053 # Keys cannot contain / in HDF5 store. Escape it 

1054 key = self._escape(key).replace('/', '\t') 

1055 result = self.store.get(key, None) 

1056 if result is None: 

1057 return default 

1058 try: 

1059 return json.loads( 

1060 result.value, 

1061 object_pairs_hook=AttrDict, 

1062 cls=CustomJSONDecoder) 

1063 except ValueError: 

1064 app_log.error('HDF5Store("%s").load("%s") is not JSON ("%r..."")', 

1065 self.path, key, result.value) 

1066 return default 

1067 

1068 def dump(self, key, value): 

1069 key = self._escape(key) 

1070 if self.store.get(key) != value: 1070 ↛ exitline 1070 didn't return from function 'dump', because the condition on line 1070 was never false

1071 if key in self.store: 

1072 del self.store[key] 

1073 self.store[key] = json.dumps( 

1074 value, 

1075 ensure_ascii=True, 

1076 separators=(',', ':'), 

1077 cls=CustomJSONEncoder) 

1078 self.changed = True 

1079 

1080 def _escape(self, key): 

1081 ''' 

1082 Converts key into a unicode string (interpreting byte-string keys as UTF-8). 

1083 HDF5 does not accept / in key names. Replace those with tabs. 

1084 ''' 

1085 if isinstance(key, six.binary_type): 1085 ↛ 1086line 1085 didn't jump to line 1086, because the condition on line 1085 was never true

1086 key = six.text_type(key, encoding='utf-8') 

1087 else: 

1088 key = six.text_type(key) 

1089 return key.replace('/', '\t') 

1090 

1091 def keys(self): 

1092 # Keys cannot contain / in HDF5 store. Unescape it 

1093 return (key.replace('\t', '/') for key in self.store.keys()) 1093 ↛ exitline 1093 didn't finish the generator expression on line 1093

1094 

1095 def flush(self): 

1096 super(HDF5Store, self).flush() 

1097 if self.changed: 

1098 app_log.debug('Flushing %s', self.path) 

1099 self.store.flush() 

1100 self.changed = False 

1101 

1102 def purge(self): 

1103 ''' 

1104 Load all keys into self.store. Delete what's required. Save. 

1105 ''' 

1106 self.flush() 

1107 changed = False 

1108 items = { 

1109 key: json.loads( 

1110 val.value, object_pairs_hook=AttrDict, cls=CustomJSONDecoder) 

1111 for key, val in self.store.items() 

1112 } 

1113 for key in self.purge_keys(items): 

1114 del self.store[key] 

1115 changed = True 

1116 if changed: 

1117 app_log.debug('Purging %s', self.path) 

1118 self.store.flush() 

1119 

1120 def close(self): 

1121 try: 

1122 self.store.close() 

1123 # h5py.h5f.get_obj_ids often raises a ValueError: Not a file id. 

1124 # This is presumably if the file handle has been closed. Log & ignore. 

1125 except ValueError: 

1126 app_log.debug('HDF5Store("%s").close() error ignored', self.path) 

1127 

1128 

1129class JSONStore(KeyStore): 

1130 ''' 

1131 A KeyStore that stores data in a JSON file. Typical usage:: 

1132 

1133 >>> store = JSONStore('file.json', flush=15) 

1134 >>> value = store.load(key) 

1135 >>> store.dump(key, value) 

1136 

1137 This is less efficient than HDF5Store for large data, but is human-readable. 

1138 They also cannot support multiple instances. Only one JSONStore instance 

1139 is permitted per file. 

1140 ''' 

1141 

1142 def __init__(self, path, *args, **kwargs): 

1143 super(JSONStore, self).__init__(*args, **kwargs) 

1144 self.path = _create_path(path) 

1145 self.store = self._read_json() 

1146 self.changed = False 

1147 self.update = {} # key-values added since flush 

1148 

1149 def _read_json(self): 

1150 try: 

1151 with io.open(self.path) as handle: # noqa: no encoding for json 

1152 return json.load(handle, cls=CustomJSONDecoder) 

1153 except (IOError, ValueError): 

1154 return {} 

1155 

1156 def _write_json(self, data): 

1157 json_value = json.dumps( 

1158 data, 

1159 ensure_ascii=True, 

1160 separators=(',', ':'), 

1161 cls=CustomJSONEncoder) 

1162 with io.open(self.path, 'w') as handle: # noqa: no encoding for json 

1163 handle.write(json_value) 

1164 

1165 def dump(self, key, value): 

1166 '''Same as store[key] = value''' 

1167 key = self._escape(key) 

1168 if self.store.get(key) != value: 

1169 self.store[key] = value 

1170 self.update[key] = value 

1171 self.changed = True 

1172 

1173 def flush(self): 

1174 super(JSONStore, self).flush() 

1175 if self.changed: 

1176 app_log.debug('Flushing %s', self.path) 

1177 store = self._read_json() 

1178 store.update(self.update) 

1179 self._write_json(store) 

1180 self.store = store 

1181 self.update = {} 

1182 self.changed = False 

1183 

1184 def purge(self): 

1185 ''' 

1186 Load all keys into self.store. Delete what's required. Save. 

1187 ''' 

1188 self.flush() 

1189 changed = False 

1190 for key in self.purge_keys(self.store): 

1191 del self.store[key] 

1192 changed = True 

1193 if changed: 

1194 app_log.debug('Purging %s', self.path) 

1195 self._write_json(self.store) 

1196 

1197 def close(self): 

1198 try: 

1199 self.flush() 

1200 # This has happened when the directory was deleted. Log & ignore. 

1201 except OSError: 

1202 app_log.error('Cannot flush %s', self.path) 

1203 

1204 

1205def _create_path(path): 

1206 # Ensure that path directory exists 

1207 path = os.path.abspath(path) 

1208 folder = os.path.dirname(path) 

1209 if not os.path.exists(folder): 

1210 os.makedirs(folder) 

1211 return path 

1212 

1213 

1214def sizeof(obj): 

1215 if isinstance(obj, dict): 

1216 return sys.getsizeof(obj) + sum(sizeof(k) + sizeof(v) for k, v in obj.items()) 

1217 elif isinstance(obj, (set, list)): 

1218 return sys.getsizeof(obj) + sum(sizeof(v) for v in obj) 

1219 return sys.getsizeof(obj)