Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1''' 

2This module is a service registry for ``gramex.yaml``. Each key must have a 

3corresponding function in this file. 

4 

5For example, if ``gramex.yaml`` contains this section:: 

6 

7 log: 

8 version: 1 

9 

10... then :func:`log` is called as ``log({"version": 1})``. If no such function 

11exists, a warning is raised. 

12''' 

13import io 

14import re 

15import os 

16import sys 

17import json 

18import atexit 

19import signal 

20import socket 

21import logging 

22import datetime 

23import posixpath 

24import mimetypes 

25import threading 

26import webbrowser 

27import tornado.web 

28import gramex.data 

29import gramex.cache 

30import gramex.license 

31import logging.config 

32import concurrent.futures 

33import six.moves.urllib.parse as urlparse 

34from copy import deepcopy 

35from six import text_type, string_types 

36from tornado.template import Template 

37from orderedattrdict import AttrDict 

38from gramex import debug, shutdown, __version__ 

39from gramex.transforms import build_transform 

40from gramex.config import locate, app_log, ioloop_running, app_log_extra, merge, walk 

41from gramex.cache import urlfetch, cache_key 

42from gramex.http import OK, NOT_MODIFIED 

43from . import urlcache 

44from .ttlcache import MAXTTL 

45from .emailer import SMTPMailer 

46from .sms import AmazonSNS, Exotel, Twilio 

47 

48# Service information, available as gramex.service after gramex.init() is called 

49info = AttrDict( 

50 app=None, 

51 schedule=AttrDict(), 

52 alert=AttrDict(), 

53 cache=AttrDict(), 

54 # Initialise with a single worker by default. threadpool.workers overrides this 

55 threadpool=concurrent.futures.ThreadPoolExecutor(1), 

56 eventlog=AttrDict(), 

57 email=AttrDict(), 

58 sms=AttrDict(), 

59 _md=None, 

60 _main_ioloop=None, 

61) 

62_cache, _tmpl_cache = AttrDict(), AttrDict() 

63atexit.register(info.threadpool.shutdown) 

64 

65 

66def version(conf): 

67 '''Check if config version is supported. Currently, only 1.0 is supported''' 

68 if conf != 1.0: 68 ↛ 69line 68 didn't jump to line 69, because the condition on line 68 was never true

69 raise NotImplementedError('version: %s is not supported. Only 1.0', conf) 

70 

71 

72def log(conf): 

73 '''Set up logging using Python's standard logging.config.dictConfig()''' 

74 # Create directories for directories mentioned by handlers if logs are used 

75 active_handlers = set(conf.get('root', {}).get('handlers', [])) 

76 for logger in conf.get('loggers', {}).values(): 

77 active_handlers |= set(logger.get('handlers', [])) 

78 for handler, handler_conf in conf.get('handlers', {}).items(): 

79 if handler in active_handlers: 

80 filename = handler_conf.get('filename', None) 

81 if filename is not None: 

82 folder = os.path.dirname(os.path.abspath(handler_conf.filename)) 

83 if not os.path.exists(folder): 83 ↛ 84line 83 didn't jump to line 84, because the condition on line 83 was never true

84 try: 

85 os.makedirs(folder) 

86 except OSError: 

87 app_log.exception('log: %s: cannot create folder %s', handler, folder) 

88 try: 

89 logging.config.dictConfig(conf) 

90 except (ValueError, TypeError, AttributeError, ImportError): 

91 app_log.exception('Error in log: configuration') 

92 

93 

94class GramexApp(tornado.web.Application): 

95 def log_request(self, handler): 

96 # BaseHandler defines a a custom log format. If that's present, use it. 

97 if hasattr(handler, 'log_request'): 97 ↛ 100line 97 didn't jump to line 100, because the condition on line 97 was never false

98 handler.log_request() 

99 # Log the request with the handler name at the end. 

100 status = handler.get_status() 

101 if status < 400: # noqa: < 400 is any successful request 

102 log_method = gramex.cache.app_log.info 

103 elif status < 500: # noqa: 400-499 is a user error 

104 log_method = gramex.cache.app_log.warning 

105 else: # 500+ is a server error 

106 log_method = gramex.cache.app_log.error 

107 request_time = 1000.0 * handler.request.request_time() 

108 handler_name = getattr(handler, 'name', handler.__class__.__name__) 

109 log_method("%d %s %.2fms %s", handler.get_status(), 

110 handler._request_summary(), request_time, handler_name) 

111 

112 def clear_handlers(self): 

113 ''' 

114 Clear all handlers in the application. 

115 (Tornado does not provide a direct way of doing this.) 

116 ''' 

117 # Up to Tornado 4.4, the handlers attribute stored the handlers 

118 if hasattr(self, 'handlers'): 118 ↛ 119line 118 didn't jump to line 119, because the condition on line 118 was never true

119 del self.handlers[:] 

120 self.named_handlers.clear() 

121 

122 # From Tornado 4.5, there are routers that hold the rules 

123 else: 

124 del self.default_router.rules[:] 

125 del self.wildcard_router.rules[:] 

126 

127 

128def app(conf): 

129 '''Set up tornado.web.Application() -- only if the ioloop hasn't started''' 

130 import tornado.ioloop 

131 

132 ioloop = tornado.ioloop.IOLoop.current() 

133 if ioloop_running(ioloop): 133 ↛ 134line 133 didn't jump to line 134, because the condition on line 133 was never true

134 app_log.warning('Ignoring app config change when running') 

135 else: 

136 info.app = GramexApp(**conf.settings) 

137 try: 

138 info.app.listen(**conf.listen) 

139 except socket.error as e: 

140 port_used_codes = dict(windows=10048, linux=98) 

141 if e.errno not in port_used_codes.values(): 

142 raise 

143 logging.error('Port %d is busy. Use --listen.port= for a different port', 

144 conf.listen.port) 

145 sys.exit(1) 

146 

147 def callback(): 

148 '''Called after all services are started. Opens browser if required''' 

149 if ioloop_running(ioloop): 

150 return 

151 

152 # If enterprise version is installed, user must accept license 

153 try: 

154 import gramexenterprise # noqa 

155 gramex.license.accept() 

156 except ImportError: 

157 pass 

158 

159 app_log.info('Listening on port %d', conf.listen.port) 

160 app_log_extra['port'] = conf.listen.port 

161 

162 # browser: True opens the application home page on localhost. 

163 # browser: url opens the application to a specific URL 

164 url = 'http://127.0.0.1:%d/' % conf.listen.port 

165 if conf.browser: 165 ↛ 166line 165 didn't jump to line 166, because the condition on line 165 was never true

166 if isinstance(conf.browser, str): 

167 url = urlparse.urljoin(url, conf.browser) 

168 try: 

169 browser = webbrowser.get() 

170 app_log.info('Opening %s in %s browser', url, browser.__class__.__name__) 

171 browser.open(url) 

172 except webbrowser.Error: 

173 app_log.info('Unable to open browser') 

174 else: 

175 app_log.info('<Ctrl-B> opens the browser. <Ctrl-D> starts the debugger.') 

176 

177 # Ensure that we call shutdown() on Ctrl-C. 

178 # On Windows, Tornado does not exit on Ctrl-C. This also fixes that. 

179 # When Ctrl-C is pressed, signal_handler() sets _exit to [True]. 

180 # check_exit() periodically watches and calls shutdown(). 

181 # But signal handlers can only be set in the main thread. 

182 # So ignore if we're not in the main thread (e.g. for nosetests, Windows service) 

183 # 

184 # Note: The PeriodicCallback takes up a small amount of CPU time. 

185 # Note: getch() doesn't handle keyboard buffer queue. 

186 # Note: This is no guarantee that shutdown() will be called. 

187 if isinstance(threading.current_thread(), threading._MainThread): 187 ↛ 188line 187 didn't jump to line 188, because the condition on line 187 was never true

188 exit = [False] 

189 

190 def check_exit(): 

191 if exit[0] is True: 

192 shutdown() 

193 # If Ctrl-D is pressed, run the Python debugger 

194 char = debug.getch() 

195 if char == b'\x04': 

196 import ipdb as pdb # noqa 

197 pdb.set_trace() # noqa 

198 # If Ctrl-B is pressed, start the browser 

199 if char == b'\x02': 

200 browser = webbrowser.get() 

201 browser.open(url) 

202 

203 def signal_handler(signum, frame): 

204 exit[0] = True 

205 

206 try: 

207 signal.signal(signal.SIGINT, signal_handler) 

208 except ValueError: 

209 # When running as a Windows Service (winservice.py), python 

210 # itself is on a thread, I think. So ignore the 

211 # ValueError: signal only works in main thread. 

212 pass 

213 else: 

214 tornado.ioloop.PeriodicCallback(check_exit, callback_time=500).start() 

215 

216 info._main_ioloop = ioloop 

217 ioloop.start() 

218 

219 return callback 

220 

221 

222def _stop_all_tasks(tasks): 

223 for name, task in tasks.items(): 

224 task.stop() 

225 tasks.clear() 

226 

227 

228def schedule(conf): 

229 '''Set up the Gramex PeriodicCallback scheduler''' 

230 # Create tasks running on ioloop for the given schedule, store it in info.schedule 

231 from . import scheduler 

232 _stop_all_tasks(info.schedule) 

233 for name, sched in conf.items(): 

234 _key = cache_key('schedule', sched) 

235 if _key in _cache: 

236 task = info.schedule[name] = _cache[_key] 

237 task.call_later() 

238 continue 

239 try: 

240 app_log.info('Initialising schedule:%s', name) 

241 _cache[_key] = scheduler.Task(name, sched, info.threadpool, 

242 ioloop=info._main_ioloop) 

243 info.schedule[name] = _cache[_key] 

244 except Exception as e: 

245 app_log.exception(e) 

246 

247 

248def _markdown_convert(content): 

249 ''' 

250 Convert content into Markdown with extensions. 

251 ''' 

252 # Cache the markdown converter 

253 if '_markdown' not in info: 

254 import markdown 

255 info['_markdown'] = markdown.Markdown(extensions=[ 

256 'markdown.extensions.extra', 

257 'markdown.extensions.meta', 

258 'markdown.extensions.codehilite', 

259 'markdown.extensions.smarty', 

260 'markdown.extensions.sane_lists', 

261 'markdown.extensions.fenced_code', 

262 'markdown.extensions.toc', 

263 ], output_format='html5') 

264 return info['_markdown'].convert(content) 

265 

266 

267def _tmpl(template_string): 

268 '''Compile Tornado template. Cache the results''' 

269 if template_string not in _tmpl_cache: 269 ↛ 271line 269 didn't jump to line 271, because the condition on line 269 was never false

270 _tmpl_cache[template_string] = Template(template_string) 

271 return _tmpl_cache[template_string] 

272 

273 

274def create_alert(name, alert): 

275 '''Generate the function to be run by alert() using the alert configuration''' 

276 

277 # Configure email service 

278 if alert.get('service', None) is None: 

279 if len(info.email) > 0: 279 ↛ 283line 279 didn't jump to line 283, because the condition on line 279 was never false

280 alert['service'] = list(info.email.keys())[0] 

281 app_log.warning('alert: %s: using first email service: %s', name, alert['service']) 

282 else: 

283 app_log.error('alert: %s: define an email: service to use', name) 

284 return 

285 service = alert['service'] 

286 mailer = info.email.get(service, None) 

287 if mailer is None: 287 ↛ 288line 287 didn't jump to line 288, because the condition on line 287 was never true

288 app_log.error('alert: %s: undefined email service: %s', name, service) 

289 return 

290 

291 # - Warn if to, cc, bcc exists and is not a string or list of strings. Ignore incorrect 

292 # - if to: [1, 'user@example.org'], then 

293 # - log a warning about the 1. Drop the 1. to: becomes ['user@example.org'] 

294 

295 # Error if to, cc, bcc are all missing, return None 

296 if not any(key in alert for key in ['to', 'cc', 'bcc']): 296 ↛ exit,   296 ↛ 2972 missed branches: 1) line 296 didn't finish the generator expression on line 296, 2) line 296 didn't jump to line 297, because the condition on line 296 was never true

297 app_log.error('alert: %s: missing to/cc/bcc', name) 

298 return 

299 # Ensure that config has the right type (str, dict, list) 

300 contentfields = ['body', 'html', 'bodyfile', 'htmlfile', 'markdown', 'markdownfile'] 

301 addr_fields = ['to', 'cc', 'bcc', 'reply_to', 'on_behalf_of', 'from'] 

302 for key in ['subject'] + addr_fields + contentfields: 

303 if not isinstance(alert.get(key, ''), string_types + (list, )): 303 ↛ 304line 303 didn't jump to line 304, because the condition on line 303 was never true

304 app_log.error('alert: %s.%s: %r must be a list or str', name, key, alert[key]) 

305 return 

306 if not isinstance(alert.get('images', {}), dict): 306 ↛ 307line 306 didn't jump to line 307, because the condition on line 306 was never true

307 app_log.error('alert: %s.images: %r is not a dict', name, alert['images']) 

308 return 

309 if not isinstance(alert.get('attachments', []), list): 309 ↛ 310line 309 didn't jump to line 310, because the condition on line 309 was never true

310 app_log.error('alert: %s.attachments: %r is not a list', name, alert['attachments']) 

311 return 

312 

313 # Warn if subject is missing 

314 if 'subject' not in alert: 

315 app_log.warning('alert: %s: missing subject', name) 

316 

317 # Warn if body, html, bodyfile, htmlfile keys are missing 

318 if not any(key in alert for key in contentfields): 

319 app_log.warning('alert: %s: missing body/html/bodyfile/htmlfile/...', name) 

320 

321 # Pre-compile data. 

322 # - `data: {key: [...]}` -- loads data in-place 

323 # - `data: {key: {url: file}}` -- loads from a file 

324 # - `data: {key: {url: sqlalchemy-url, table: table}}` -- loads from a database 

325 # - `data: file` -- same as `data: {data: {url: file}}` 

326 # - `data: {key: file}` -- same as `data: {key: {url: file}}` 

327 # - `data: [...]` -- same as `data: {data: [...]}` 

328 datasets = {} 

329 if 'data' in alert: 

330 if isinstance(alert['data'], string_types): 

331 datasets = {'data': {'url': alert['data']}} 

332 elif isinstance(alert['data'], list): 

333 datasets = {'data': alert['data']} 

334 elif isinstance(alert['data'], dict): 334 ↛ 343line 334 didn't jump to line 343, because the condition on line 334 was never false

335 for key, dataset in alert['data'].items(): 

336 if isinstance(dataset, string_types): 336 ↛ 338line 336 didn't jump to line 338, because the condition on line 336 was never false

337 datasets[key] = {'url': dataset} 

338 elif isinstance(dataset, list) or 'url' in dataset: 

339 datasets[key] = dataset 

340 else: 

341 app_log.error('alert: %s.data: %s is missing url:', name, key) 

342 else: 

343 app_log.error('alert: %s.data: must be a data file or dict. Not %s', 

344 name, repr(alert['data'])) 

345 

346 if 'each' in alert and alert['each'] not in datasets: 346 ↛ 347line 346 didn't jump to line 347, because the condition on line 346 was never true

347 app_log.error('alert: %s.each: %s is not in data:', name, alert['each']) 

348 return 

349 

350 vars = {key: None for key in datasets} 

351 vars.update({'config': None, 'args': None}) 

352 condition = build_transform( 

353 {'function': alert.get('condition', 'True')}, 

354 filename='alert: %s' % name, vars=vars, iter=False) 

355 

356 alert_logger = logging.getLogger('gramex.alert') 

357 

358 def load_datasets(data, each): 

359 ''' 

360 Modify data by load datasets and filter by condition. 

361 Modify each to apply the each: argument, else return (None, None) 

362 ''' 

363 for key, val in datasets.items(): 363 ↛ 365line 363 didn't jump to line 365, because the loop on line 363 never started

364 # Allow raw data in lists as-is. Treat dicts as {url: ...} 

365 data[key] = val if isinstance(val, list) else gramex.data.filter(**val) 

366 result = condition(**data) 

367 # Avoiding isinstance(result, pd.DataFrame) to avoid importing pandas 

368 if type(result).__name__ == 'DataFrame': 368 ↛ 369line 368 didn't jump to line 369, because the condition on line 368 was never true

369 data['data'] = result 

370 elif isinstance(result, dict): 370 ↛ 371line 370 didn't jump to line 371, because the condition on line 370 was never true

371 data.update(result) 

372 elif not result: 372 ↛ 373line 372 didn't jump to line 373, because the condition on line 372 was never true

373 app_log.debug('alert: %s stopped. condition = %s', name, result) 

374 return 

375 if 'each' in alert: 375 ↛ 376line 375 didn't jump to line 376, because the condition on line 375 was never true

376 each_data = data[alert['each']] 

377 if isinstance(each_data, dict): 

378 each += list(each_data.items()) 

379 elif isinstance(each_data, list): 

380 each += list(enumerate(each_data)) 

381 elif hasattr(each_data, 'iterrows'): 

382 each += list(each_data.iterrows()) 

383 else: 

384 raise ValueError('alert: %s: each: data.%s must be dict/list/DF, not %s' % ( 

385 name, alert['each'], type(each_data))) 

386 else: 

387 each.append((0, None)) 

388 

389 def create_mail(data): 

390 ''' 

391 Return kwargs that can be passed to a mailer.mail 

392 ''' 

393 mail = {} 

394 for key in ['bodyfile', 'htmlfile', 'markdownfile']: 

395 target = key.replace('file', '') 

396 if key in alert and target not in alert: 396 ↛ 397line 396 didn't jump to line 397, because the condition on line 396 was never true

397 path = _tmpl(alert[key]).generate(**data).decode('utf-8') 

398 tmpl = gramex.cache.open(path, 'template') 

399 mail[target] = tmpl.generate(**data).decode('utf-8') 

400 for key in addr_fields + ['subject', 'body', 'html', 'markdown']: 

401 if key not in alert: 

402 continue 

403 if isinstance(alert[key], list): 403 ↛ 404line 403 didn't jump to line 404, because the condition on line 403 was never true

404 mail[key] = [_tmpl(v).generate(**data).decode('utf-8') for v in alert[key]] 

405 else: 

406 mail[key] = _tmpl(alert[key]).generate(**data).decode('utf-8') 

407 headers = {} 

408 # user: {id: ...} creates an X-Gramex-User header to mimic the user 

409 if 'user' in alert: 409 ↛ 410line 409 didn't jump to line 410, because the condition on line 409 was never true

410 user = deepcopy(alert['user']) 

411 for key, val, node in walk(user): 

412 node[key] = _tmpl(val).generate(**data).decode('utf-8') 

413 user = json.dumps(user, ensure_ascii=True, separators=(',', ':')) 

414 headers['X-Gramex-User'] = tornado.web.create_signed_value( 

415 info.app.settings['cookie_secret'], 'user', user) 

416 if 'markdown' in mail: 416 ↛ 417line 416 didn't jump to line 417, because the condition on line 416 was never true

417 mail['html'] = _markdown_convert(mail.pop('markdown')) 

418 if 'images' in alert: 418 ↛ 419line 418 didn't jump to line 419, because the condition on line 418 was never true

419 mail['images'] = {} 

420 for cid, val in alert['images'].items(): 

421 urlpath = _tmpl(val).generate(**data).decode('utf-8') 

422 urldata = urlfetch(urlpath, info=True, headers=headers) 

423 if urldata['content_type'].startswith('image/'): 

424 mail['images'][cid] = urldata['name'] 

425 else: 

426 with io.open(urldata['name'], 'rb') as temp_file: 

427 bytestoread = 80 

428 first_line = temp_file.read(bytestoread) 

429 # TODO: let admin know that the image was not processed 

430 app_log.error('alert: %s: %s: %d (%s) not an image: %s\n%r', name, 

431 cid, urldata['r'].status_code, urldata['content_type'], 

432 urlpath, first_line) 

433 if 'attachments' in alert: 433 ↛ 434line 433 didn't jump to line 434

434 mail['attachments'] = [ 

435 urlfetch(_tmpl(v).generate(**data).decode('utf-8'), headers=headers) 

436 for v in alert['attachments'] 

437 ] 

438 return mail 

439 

440 def run_alert(callback=None, args=None): 

441 ''' 

442 Runs the configured alert. If a callback is specified, calls the 

443 callback with all email arguments. Else sends the email. 

444 If args= is specified, add it as data['args']. 

445 ''' 

446 app_log.info('alert: %s running', name) 

447 data, each, fail = {'config': alert, 'args': {} if args is None else args}, [], [] 

448 try: 

449 load_datasets(data, each) 

450 except Exception as e: 

451 app_log.exception('alert: %s data processing failed', name) 

452 fail.append({'error': e}) 

453 

454 retval = [] 

455 for index, row in each: 

456 data['index'], data['row'], data['config'] = index, row, alert 

457 try: 

458 retval.append(AttrDict(index=index, row=row, mail=create_mail(data))) 

459 except Exception as e: 

460 app_log.exception('alert: %s[%s] templating (row=%r)', name, index, row) 

461 fail.append({'index': index, 'row': row, 'error': e}) 

462 

463 callback = mailer.mail if not callable(callback) else callback 

464 done = [] 

465 for v in retval: 

466 try: 

467 callback(**v.mail) 

468 except Exception as e: 

469 fail.append({'index': v.index, 'row': v.row, 'mail': v.mail, 'error': e}) 

470 app_log.exception('alert: %s[%s] delivery (row=%r)', name, v.index, v.row) 

471 else: 

472 done.append(v) 

473 event = { 

474 'alert': name, 'service': service, 'from': mailer.email or '', 

475 'to': '', 'cc': '', 'bcc': '', 'subject': '', 

476 'datetime': datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ") 

477 } 

478 event.update({k: v for k, v in v.mail.items() if k in event}) 

479 event['attachments'] = ', '.join(v.mail.get('attachments', [])) 

480 alert_logger.info(event) 

481 

482 # Run notifications 

483 args = {'done': done, 'fail': fail} 

484 for notification_name in alert.get('notify', []): 484 ↛ 485line 484 didn't jump to line 485, because the loop on line 484 never started

485 notify = info.alert.get(notification_name) 

486 if notify is not None: 

487 notify.run(callback=callback, args=args) 

488 else: 

489 app_log.error('alert: %s.notify: alert %s not defined', name, notification_name) 

490 return args 

491 

492 return run_alert 

493 

494 

495def alert(conf): 

496 from . import scheduler 

497 _stop_all_tasks(info.alert) 

498 schedule_keys = 'minutes hours dates months weekdays years startup utc'.split() 

499 

500 for name, alert in conf.items(): 

501 _key = cache_key('alert', alert) 

502 if _key in _cache: 

503 task = info.alert[name] = _cache[_key] 

504 task.call_later() 

505 continue 

506 app_log.info('Initialising alert: %s', name) 

507 schedule = {key: alert[key] for key in schedule_keys if key in alert} 

508 if 'thread' in alert: 508 ↛ 509line 508 didn't jump to line 509, because the condition on line 508 was never true

509 schedule['thread'] = alert['thread'] 

510 schedule['function'] = create_alert(name, alert) 

511 if schedule['function'] is not None: 511 ↛ 500line 511 didn't jump to line 500, because the condition on line 511 was never false

512 try: 

513 _cache[_key] = scheduler.Task(name, schedule, info.threadpool, 

514 ioloop=info._main_ioloop) 

515 info.alert[name] = _cache[_key] 

516 except Exception: 

517 app_log.exception('Failed to initialize alert: %s', name) 

518 

519 

520def threadpool(conf): 

521 '''Set up a global threadpool executor''' 

522 # By default, use a single worker. If a different value is specified, use it 

523 workers = 1 

524 if conf and hasattr(conf, 'get'): 524 ↛ 526line 524 didn't jump to line 526, because the condition on line 524 was never false

525 workers = conf.get('workers', workers) 

526 info.threadpool = concurrent.futures.ThreadPoolExecutor(workers) 

527 atexit.register(info.threadpool.shutdown) 

528 

529 

530def handlers(conf): 

531 ''' 

532 The handlers: config is used by the url: handlers to set up the defaults. 

533 No explicit configuration is required. 

534 ''' 

535 pass 

536 

537 

538def _sort_url_patterns(entry): 

539 ''' 

540 Sort URL patterns based on their specificity. This allows patterns to 

541 over-ride each other in a CSS-like way. 

542 ''' 

543 name, spec = entry 

544 pattern = spec.pattern 

545 # URLs are resolved in this order: 

546 return ( 

547 spec.get('priority', 0), # by explicity priority: parameter 

548 pattern.count('/'), # by path depth (deeper paths are higher) 

549 -(pattern.count('*') + 

550 pattern.count('+')), # by wildcards (wildcards get lower priority) 

551 ) 

552 # TODO: patterns like (js/.*|css/.*|img/.*) will have path depth of 3. 

553 # But this should really count only as 1. 

554 

555 

556def _url_normalize(pattern): 

557 '''Remove double slashes, ../, ./ etc in the URL path. Remove URL fragment''' 

558 url = urlparse.urlsplit(pattern) 

559 path = posixpath.normpath(url.path) 

560 if url.path.endswith('/') and not path.endswith('/'): 

561 path += '/' 

562 return urlparse.urlunsplit((url.scheme, url.netloc, path, url.query, '')) 

563 

564 

565def _get_cache_key(conf, name): 

566 ''' 

567 Parse the cache.key parameter. Return a function that takes the request and 

568 returns the cache key value. 

569 

570 The cache key is a string or a list of strings. The strings can be: 

571 

572 - ``request.attr`` => ``request.attr`` can be any request attribute, as str 

573 - ``header.key`` => ``request.headers[key]`` 

574 - ``cookies.key`` => ``request.cookies[key].value`` 

575 - ``args.key`` => ``handler.args[key]`` joined with a comma. 

576 - ``user.key`` => ``handler.current_user[key]`` as str 

577 

578 Invalid key strings are ignored with a warning. If all key strings are 

579 invalid, the default cache.key of ``request.uri`` is used. 

580 ''' 

581 default_key = 'request.uri' 

582 keys = conf.get('key', default_key) 

583 if not isinstance(keys, list): 

584 keys = [keys] 

585 key_getters = [] 

586 for key in keys: 

587 parts = key.split('.', 2) 

588 if len(parts) < 2: 

589 app_log.warning('url: %s: ignoring invalid cache key %s', name, key) 

590 continue 

591 # convert second part into a Python string representation 

592 val = repr(parts[1]) 

593 if parts[0] == 'request': 

594 key_getters.append('u(getattr(request, %s, missing))' % val) 

595 elif parts[0].startswith('header'): 

596 key_getters.append('request.headers.get(%s, missing)' % val) 

597 elif parts[0].startswith('cookie'): 

598 key_getters.append( 

599 'request.cookies[%s].value if %s in request.cookies else missing' % (val, val)) 

600 elif parts[0].startswith('user'): 

601 key_getters.append('u(handler.current_user.get(%s, missing)) ' 

602 'if handler.current_user else missing' % val) 

603 elif parts[0].startswith('arg'): 

604 key_getters.append('argsep.join(handler.args.get(%s, [missing]))' % val) 

605 else: 

606 app_log.warning('url: %s: ignoring invalid cache key %s', name, key) 

607 # If none of the keys are valid, use the default request key 

608 if not len(key_getters): 

609 key_getters = [default_key] 

610 

611 method = 'def cache_key(handler):\n' 

612 method += '\trequest = handler.request\n' 

613 method += '\treturn (%s)' % ', '.join(key_getters) 

614 context = { 

615 'missing': '~', 

616 'argsep': ', ', # join args using comma 

617 'u': text_type # convert to unicode 

618 } 

619 # The code is constructed entirely by this function. Using exec is safe 

620 exec(method, context) # nosec 

621 return context['cache_key'] 

622 

623 

624def _cache_generator(conf, name): 

625 ''' 

626 The ``url:`` section of ``gramex.yaml`` can specify a ``cache:`` section. For 

627 example:: 

628 

629 url: 

630 home: 

631 pattern: / 

632 handler: ... 

633 cache: 

634 key: request.uri 

635 store: memory 

636 expires: 

637 duration: 1 minute 

638 

639 This function takes the ``cache`` section of the configuration and returns a 

640 "cache" function. This function accepts a RequestHandler and returns a 

641 ``CacheFile`` instance. 

642 

643 Here's a typical usage:: 

644 

645 cache_method = _cache_generator(conf.cache) # one-time initialisation 

646 cache_file = cache_method(handler) # used inside a hander 

647 

648 The cache_file instance exposes the following interface:: 

649 

650 cache_file.get() # returns None 

651 cache_file.write('abc') 

652 cache_file.write('def') 

653 cache_file.close() 

654 cache_file.get() # returns 'abcdef' 

655 ''' 

656 # cache: can be True (to use default settings) or False (to disable cache) 

657 if conf is True: 

658 conf = {} 

659 elif conf is False: 659 ↛ 660line 659 didn't jump to line 660, because the condition on line 659 was never true

660 return None 

661 

662 # Get the store. Defaults to the first store in the cache: section 

663 default_store = list(info.cache.keys())[0] if len(info.cache) > 0 else None 

664 store_name = conf.get('store', default_store) 

665 if store_name not in info.cache: 665 ↛ 666line 665 didn't jump to line 666, because the condition on line 665 was never true

666 app_log.warning('url: %s: store %s missing', name, store_name) 

667 store = info.cache.get(store_name) 

668 

669 url_cache_key = _get_cache_key(conf, name) 

670 cachefile_class = urlcache.get_cachefile(store) 

671 cache_expiry = conf.get('expiry', {}) 

672 cache_statuses = conf.get('status', [OK, NOT_MODIFIED]) 

673 cache_expiry_duration = cache_expiry.get('duration', MAXTTL) 

674 

675 # This method will be added to the handler class as "cache", and called as 

676 # self.cache() 

677 def get_cachefile(handler): 

678 return cachefile_class(key=url_cache_key(handler), store=store, 

679 handler=handler, expire=cache_expiry_duration, 

680 statuses=set(cache_statuses)) 

681 

682 return get_cachefile 

683 

684 

685def url(conf): 

686 '''Set up the tornado web app URL handlers''' 

687 handlers = [] 

688 # Sort the handlers in descending order of priority 

689 specs = sorted(conf.items(), key=_sort_url_patterns, reverse=True) 

690 for name, spec in specs: 

691 _key = cache_key('url', spec) 

692 if _key in _cache: 

693 handlers.append(_cache[_key]) 

694 continue 

695 if 'handler' not in spec: 695 ↛ 696line 695 didn't jump to line 696, because the condition on line 695 was never true

696 app_log.error('url: %s: no handler specified') 

697 continue 

698 app_log.debug('url: %s (%s) %s', name, spec.handler, spec.get('priority', '')) 

699 urlspec = AttrDict(spec) 

700 handler = locate(spec.handler, modules=['gramex.handlers']) 

701 if handler is None: 

702 app_log.error('url: %s: ignoring missing handler %s', name, spec.handler) 

703 continue 

704 

705 # Create a subclass of the handler with additional attributes. 

706 class_vars = {'name': name, 'conf': spec} 

707 # If there's a cache section, get the cache method for use by BaseHandler 

708 if 'cache' in urlspec: 

709 class_vars['cache'] = _cache_generator(urlspec['cache'], name=name) 

710 else: 

711 class_vars['cache'] = None 

712 # PY27 type() requires the class name to be a string, not unicode 

713 urlspec.handler = type(str(spec.handler), (handler, ), class_vars) 

714 

715 # If there's a setup method, call it to initialize the class 

716 kwargs = urlspec.get('kwargs', {}) 

717 if hasattr(handler, 'setup'): 717 ↛ 729line 717 didn't jump to line 729, because the condition on line 717 was never false

718 try: 

719 urlspec.handler.setup_default_kwargs() 

720 urlspec.handler.setup(**kwargs) 

721 except Exception: 

722 app_log.exception('url: %s: setup exception in handler %s', name, spec.handler) 

723 # Since we can't set up the handler, all requests must report the error instead 

724 class_vars['exc_info'] = sys.exc_info() 

725 error_handler = locate('SetupFailedHandler', modules=['gramex.handlers']) 

726 urlspec.handler = type(str(spec.handler), (error_handler, ), class_vars) 

727 urlspec.handler.setup(**kwargs) 

728 

729 try: 

730 handler_entry = tornado.web.URLSpec( 

731 name=name, 

732 pattern=_url_normalize(urlspec.pattern), 

733 handler=urlspec.handler, 

734 kwargs=kwargs, 

735 ) 

736 except re.error: 736 ↛ 739line 736 didn't jump to line 739

737 app_log.error('url: %s: pattern: %s is invalid', name, urlspec.pattern) 

738 continue 

739 except Exception: 

740 app_log.exception('url: %s: invalid', name) 

741 continue 

742 _cache[_key] = handler_entry 

743 handlers.append(handler_entry) 

744 

745 info.app.clear_handlers() 

746 info.app.add_handlers('.*$', handlers) 

747 

748 

749def mime(conf): 

750 '''Set up MIME types''' 

751 for ext, type in conf.items(): 

752 mimetypes.add_type(type, ext, strict=True) 

753 

754 

755def watch(conf): 

756 '''Set up file watchers''' 

757 from . import watcher 

758 

759 events = {'on_modified', 'on_created', 'on_deleted', 'on_moved', 'on_any_event'} 

760 for name, config in conf.items(): 

761 _key = cache_key('watch', config) 

762 if _key in _cache: 

763 watcher.watch(name, **_cache[_key]) 

764 continue 

765 if 'paths' not in config: 

766 app_log.error('watch:%s has no "paths"', name) 

767 continue 

768 if not set(config.keys()) & events: 

769 app_log.error('watch:%s has no events (on_modified, ...)', name) 

770 continue 

771 if not isinstance(config['paths'], (list, set, tuple)): 

772 config['paths'] = [config['paths']] 

773 for event in events: 

774 if event in config: 

775 if not callable(config[event]): 775 ↛ 773line 775 didn't jump to line 773, because the condition on line 775 was never false

776 config[event] = locate(config[event], modules=['gramex.transforms']) 

777 if not callable(config[event]): 

778 app_log.error('watch:%s.%s is not callable', name, event) 

779 config[event] = lambda event: None 

780 _cache[_key] = config 

781 watcher.watch(name, **_cache[_key]) 

782 

783 

784_cache_defaults = { 

785 'memory': { 

786 'size': 500000000, # 500 MiB 

787 }, 

788 'disk': { 

789 'size': 10000000000, # 10 GiB 

790 } 

791} 

792 

793 

794def cache(conf): 

795 '''Set up caches''' 

796 for name, config in conf.items(): 

797 cache_type = config['type'] 

798 if cache_type not in _cache_defaults: 798 ↛ 799line 798 didn't jump to line 799, because the condition on line 798 was never true

799 app_log.warning('cache: %s has unknown type %s', name, config.type) 

800 continue 

801 config = merge(dict(config), _cache_defaults[cache_type], mode='setdefault') 

802 if cache_type == 'memory': 

803 info.cache[name] = urlcache.MemoryCache( 

804 maxsize=config['size'], getsizeof=gramex.cache.sizeof) 

805 elif cache_type == 'disk': 805 ↛ 811line 805 didn't jump to line 811, because the condition on line 805 was never false

806 path = config.get('path', '.cache-' + name) 

807 info.cache[name] = urlcache.DiskCache( 

808 path, size_limit=config['size'], eviction_policy='least-recently-stored') 

809 atexit.register(info.cache[name].close) 

810 # if default: true, make this the default cache for gramex.cache.{open,query} 

811 if config.get('default'): 

812 for key in ['_OPEN_CACHE', '_QUERY_CACHE']: 

813 val = gramex.cache.set_cache(info.cache[name], getattr(gramex.cache, key)) 

814 setattr(gramex.cache, key, val) 

815 

816 

817def eventlog(conf): 

818 '''Set up the application event logger''' 

819 if not conf.path: 819 ↛ 820line 819 didn't jump to line 820, because the condition on line 819 was never true

820 return 

821 

822 import time 

823 import sqlite3 

824 

825 folder = os.path.dirname(os.path.abspath(conf.path)) 

826 if not os.path.exists(folder): 826 ↛ 827line 826 didn't jump to line 827, because the condition on line 826 was never true

827 os.makedirs(folder) 

828 

829 def query(q, *args, **kwargs): 

830 conn = sqlite3.connect(conf.path, check_same_thread=False) 

831 conn.row_factory = sqlite3.Row 

832 result = list(conn.execute(q, *args, **kwargs)) 

833 conn.commit() 

834 conn.close() 

835 return result 

836 

837 def add(event_name, data): 

838 '''Write a message into the application event log''' 

839 data = json.dumps(data, ensure_ascii=True, separators=(',', ':')) 

840 query('INSERT INTO events VALUES (?, ?, ?)', [time.time(), event_name, data]) 

841 

842 def shutdown(): 

843 add('shutdown', {'version': __version__, 'pid': os.getpid()}) 

844 # Don't close the connection here. gramex.gramex_update() runs in a thread. If we start and 

845 # stop gramex quickly, allow gramex_update to add too this entry 

846 # conn.close() 

847 

848 info.eventlog.query = query 

849 info.eventlog.add = add 

850 

851 query('CREATE TABLE IF NOT EXISTS events (time REAL, event TEXT, data TEXT)') 

852 add('startup', {'version': __version__, 'pid': os.getpid(), 

853 'args': sys.argv, 'cwd': os.getcwd()}) 

854 atexit.register(shutdown) 

855 

856 

857def email(conf): 

858 '''Set up email service''' 

859 for name, config in conf.items(): 

860 _key = cache_key('email', config) 

861 if _key in _cache: 861 ↛ 862line 861 didn't jump to line 862, because the condition on line 861 was never true

862 info.email[name] = _cache[_key] 

863 continue 

864 info.email[name] = _cache[_key] = SMTPMailer(**config) 

865 

866 

867sms_notifiers = { 

868 'amazonsns': AmazonSNS, 

869 'exotel': Exotel, 

870 'twilio': Twilio, 

871} 

872 

873 

874def sms(conf): 

875 '''Set up SMS service''' 

876 for name, config in conf.items(): 

877 _key = cache_key('sms', config) 

878 if _key in _cache: 

879 info.sms[name] = _cache[_key] 

880 continue 

881 notifier_type = config.pop('type') 

882 if notifier_type not in sms_notifiers: 882 ↛ 883line 882 didn't jump to line 883, because the condition on line 882 was never true

883 raise ValueError('sms: %s: Unknown type: %s' % (name, notifier_type)) 

884 info.sms[name] = _cache[_key] = sms_notifiers[notifier_type](**config) 

885 

886 

887def encrypt(conf): 

888 app_log.warning('encrypt: service deprecated.') 

889 

890 

891def test(conf): 

892 '''Set up test service''' 

893 # Remove auth: section when running gramex. 

894 # If there are passwords here, they will not be loaded in memory 

895 conf.pop('auth', None)