Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import ast 

2import six 

3import json 

4import importlib 

5import tornado.gen 

6from types import GeneratorType 

7from orderedattrdict import AttrDict 

8from gramex.config import app_log, locate, variables, CustomJSONEncoder 

9 

10 

11def _arg_repr(arg): 

12 ''' 

13 Arguments starting with ``=`` are converted into the variable. Otherwise, 

14 values are treated as strings. For example, ``=x`` is the variable ``x`` but 

15 ``x`` is the string ``"x"``. ``==x`` is the string ``"=x"``. 

16 ''' 

17 if isinstance(arg, six.string_types): 

18 if arg.startswith('=='): 18 ↛ 19line 18 didn't jump to line 19, because the condition on line 18 was never true

19 return repr(arg[1:]) # "==x" becomes '"=x"' 

20 elif arg.startswith('='): 

21 return arg[1:] # "=x" becomes 'x' 

22 return repr(arg) # "x" becomes '"x"', 1 becomes '1', etc 

23 

24 

25def _full_name(tree): 

26 '''Decompile ast tree for "x", "module.x", "package.module.x", etc''' 

27 if isinstance(tree, ast.Name): 

28 return tree.id 

29 elif isinstance(tree, ast.Attribute): 

30 parent = _full_name(tree.value) 

31 return parent + '.' + tree.attr if parent is not None else parent 

32 return None 

33 

34 

35def module_names(node, vars): 

36 ''' 

37 Collects a list of modules mentioned in an AST tree. Ignores things in vars 

38 

39 visitor = ModuleNameVisitor() 

40 visitor.visit(ast.parse(expression)) 

41 visitor.modules 

42 ''' 

43 context = [] 

44 modules = set() 

45 

46 def visit(node): 

47 if not hasattr(node, '_fields'): 47 ↛ 48line 47 didn't jump to line 48, because the condition on line 47 was never true

48 return 

49 for child in ast.iter_child_nodes(node): 

50 if isinstance(child, ast.Name): 

51 if len(context) and context[-1]: 

52 module = [child.id] 

53 for p in context[::-1]: 53 ↛ 58line 53 didn't jump to line 58, because the loop on line 53 didn't complete

54 if p is not None: 

55 module.append(p) 

56 else: 

57 break 

58 if len(module) and module[0] not in vars: 

59 module.pop() 

60 while len(module): 

61 module_name = '.'.join(module) 

62 try: 

63 importlib.import_module(module_name) 

64 modules.add(module_name) 

65 break 

66 except ImportError: 

67 module.pop() 

68 # Anything other than an ImportError means we've identified the module. 

69 # E.g. A SyntaxError means the file is right, it just has an error. 

70 # Add these modules as well. 

71 else: 

72 modules.add(module_name) 

73 break 

74 context.append(child.attr if isinstance(child, ast.Attribute) else None) 

75 visit(child) 

76 context.pop() 

77 

78 visit(node) 

79 return modules 

80 

81 

82def build_transform(conf, vars=None, filename='transform', cache=False, iter=True): 

83 ''' 

84 Converts an expression into a callable function. For e.g.:: 

85 

86 function: json.dumps("x", separators: [",", ":"]) 

87 

88 translates to:: 

89 

90 fn = build_transform(conf={ 

91 'function': 'json.dumps("x", separators: [",", ":"])', 

92 }) 

93 

94 which becomes:: 

95 

96 def transform(_val): 

97 import json 

98 result = json.dumps("x", separators=[",", ":""]) 

99 return result if isinstance(result, GeneratorType) else (result,) 

100 

101 The same can also be achieved via:: 

102 

103 function: json.dumps 

104 args: ["x"] 

105 kwargs: 

106 separators: [",", ":"] 

107 

108 Any Python expression is also allowed. The following are valid functions:: 

109 

110 function: 1 # returns 1 

111 function: _val + 1 # Increments the input parameter by 1 

112 function: json.dumps(_val) # Returns the input as a string 

113 function: json.dumps # This is the same as json.dumps(_val) 

114 

115 ``build_transform`` also takes an optional ``filename=`` parameter that sets 

116 the "filename" of the returned function. This is useful for log messages. 

117 

118 It takes an optional ``cache=True`` that permanently caches the transform. 

119 The default is ``False`` that re-imports the function's module if changed. 

120 

121 The returned function takes a single argument called ``_val`` by default. You 

122 can change the arguments it accepts using ``vars``. For example:: 

123 

124 fn = build_transform(..., vars={'x': None, 'y': 1}) 

125 

126 creates:: 

127 

128 def transform(x=None, y=1): 

129 ... 

130 

131 Or pass ``vars={}`` for function that does not accept any parameters. 

132 

133 The returned function returns an iterable containing the values. If the 

134 function returns a single value, you can get it on the first iteration. If 

135 the function returns a generator object, that is returned as-is. 

136 

137 But if ``iter=False`` is passed, the returned function just contains the 

138 returned value as-is -- not as a list. 

139 

140 In the ``conf`` parameter, ``args`` and ``kwargs`` values are interpreted 

141 literally. But values starting with ``=`` like ``=args`` are treated as 

142 variables. (Start ``==`` to represent a string that begins with ``=``.) For 

143 example, when this is called with ``vars={"handler": None}``:: 

144 

145 function: json.dumps 

146 args: =handler 

147 kwargs: 

148 key: abc 

149 name: =handler.name 

150 

151 becomes:: 

152 

153 def transform(handler=None): 

154 return json.dumps(handler, key="abc", name=handler.name) 

155 ''' 

156 # Ensure that the transform is a dict. This is a common mistake. We forget 

157 # the pattern: prefix 

158 if not hasattr(conf, 'items'): 158 ↛ 159line 158 didn't jump to line 159, because the condition on line 158 was never true

159 raise ValueError('%s: needs {function: name}. Got %s' % (filename, repr(conf))) 

160 

161 conf = {key: val for key, val in conf.items() if key in {'function', 'args', 'kwargs'}} 

162 

163 # The returned function takes a single argument by default 

164 if vars is None: 164 ↛ 165line 164 didn't jump to line 165, because the condition on line 164 was never true

165 vars = {'_val': None} 

166 

167 if 'function' not in conf or not conf['function']: 

168 raise KeyError('%s: No function in conf %s' % (filename, conf)) 

169 

170 # Get the name of the function in case it's specified as a function call 

171 # expr is the full function / expression, e.g. six.text_type("abc") 

172 # tree is the ast result 

173 expr = conf['function'] 

174 tree = ast.parse(expr) 

175 if len(tree.body) != 1 or not isinstance(tree.body[0], ast.Expr): 175 ↛ 176line 175 didn't jump to line 176, because the condition on line 175 was never true

176 raise ValueError('%s: function: must be an Python function or expression, not %s', 

177 (filename, expr)) 

178 

179 # Check whether to use the expression as is, or construct the expression 

180 # If expr is like "x" or "module.x", construct it if it's callable 

181 # Else, use the expression as-is 

182 function_name = _full_name(tree.body[0].value) 

183 if function_name is not None: 

184 function = locate(function_name, modules=['gramex.transforms']) 

185 if function is None: 

186 app_log.error('%s: Cannot load function %s' % (filename, function_name)) 

187 # This section converts the function into an expression. 

188 # We do this only if the original expression was a *callable* function. 

189 # But if we can't load the original function (e.g. SyntaxError), 

190 # treat that as a function as well, allowing users to correct it later. 

191 if callable(function) or function is None: 191 ↛ 207line 191 didn't jump to line 207, because the condition on line 191 was never false

192 if 'args' in conf: 

193 # If args is not a list, convert to a list with that value 

194 args = conf['args'] if isinstance(conf['args'], list) else [conf['args']] 

195 else: 

196 # If args is not specified, use vars' keys as args 

197 args = ['=%s' % var for var in vars.keys()] 

198 # Add the function, arguments, and kwargs 

199 expr = function_name + '(' 

200 for arg in args: 

201 expr += '%s, ' % _arg_repr(arg) 

202 for key, val in conf.get('kwargs', {}).items(): 

203 expr += '%s=%s, ' % (key, _arg_repr(val)) 

204 expr += ')' 

205 

206 # Create the code 

207 modules = module_names(tree, vars) 

208 modulestr = ', '.join(sorted(modules)) 

209 body = [ 

210 'def transform(', ', '.join('{:s}={!r:}'.format(k, v) for k, v in vars.items()), '):\n', 

211 '\timport %s\n' % modulestr if modulestr else '', 

212 '\treload_module(%s)\n' % modulestr if modulestr and not cache else '', 

213 '\tresult = %s\n' % expr, 

214 # If the result is a generator object, return it. Else, create a list and 

215 # return that. This ensures that the returned value is always an iterable 

216 '\treturn result if isinstance(result, GeneratorType) else [result,]' if iter else 

217 '\treturn result', 

218 ] 

219 

220 # Compile the function with context variables 

221 import gramex.transforms 

222 from gramex.cache import reload_module 

223 context = dict( 

224 reload_module=reload_module, 

225 GeneratorType=GeneratorType, 

226 Return=tornado.gen.Return, 

227 AttrDict=AttrDict, 

228 **{key: getattr(gramex.transforms, key) for key in gramex.transforms.__all__} 

229 ) 

230 code = compile(''.join(body), filename=filename, mode='exec') 

231 exec(code, context) # nosec - OK to run arbitrary Python code in YAML 

232 

233 # Return the transformed function 

234 function = context['transform'] 

235 function.__name__ = str(function_name or filename) 

236 function.__doc__ = str(function.__doc__) 

237 

238 return function 

239 

240 

241def condition(*args): 

242 ''' 

243 DEPRECATED. Use the ``if`` construct in config keys instead. 

244 

245 Variables can also be computed based on conditions:: 

246 

247 variables: 

248 OS: 

249 default: 'No OS variable defined' 

250 PORT: 

251 function: condition 

252 args: 

253 - $OS.startswith('Windows') 

254 - 9991 

255 - $OS.startswith('Linux') 

256 - 9992 

257 - 8883 

258 ''' 

259 from string import Template 

260 var_defaults = {} 

261 for var in variables: 

262 var_defaults[var] = "variables.get('%s', '')" % var 

263 # could use iter, range(.., 2) 

264 if len(args) == 1 and isinstance(args[0], dict): 

265 pairs = args[0].items() 

266 else: 

267 pairs = zip(args[0::2], args[1::2]) 

268 for cond, val in pairs: 

269 if isinstance(cond, six.string_types): 

270 if eval(Template(cond).substitute(var_defaults)): # nosec - any Python expr is OK 

271 return val 

272 elif bool(cond): 272 ↛ 273line 272 didn't jump to line 273, because the condition on line 272 was never true

273 return val 

274 

275 # If none of the conditions matched, we'll be here. 

276 # If there are an odd number of arguments and there's at least one condition, 

277 # treat the last as a default. 

278 if len(args) % 2 == 1 and len(args) > 2: 

279 return args[-1] 

280 

281 

282def flattener(fields, default=None, filename='flatten'): 

283 ''' 

284 Generates a function that flattens deep dictionaries. For example:: 

285 

286 >>> flat = flattener({ 

287 'id': 'id', 

288 'name': 'user.screen_name' 

289 }) 

290 >>> flat({'id': 1, 'user': {'screen_name': 'name'}}) 

291 {'id': 1, 'name': 'name'} 

292 

293 Fields map as follows:: 

294 

295 '' => obj 

296 True => obj 

297 1 => obj[1] 

298 'x' => obj['x'] 

299 'x.y' => obj['x']['y'] 

300 '1.x' => obj[1]['x'] 

301 

302 Missing values map to ``None``. You can change ``None`` to '' passing a 

303 ``default=''`` or any other default value. 

304 ''' 

305 body = [ 

306 'def %s(obj):\n' % filename, 

307 '\tr = AttrDict()\n', 

308 ] 

309 

310 def assign(field, target, catch_errors=False): 

311 field = repr(field) 

312 if catch_errors: 

313 body.append('\ttry: r[%s] = %s\n' % (field, target)) 

314 body.append('\texcept (KeyError, TypeError, IndexError): r[%s] = default\n' % field) 

315 else: 

316 body.append('\tr[%s] = %s\n' % (field, target)) 

317 

318 for field, source in fields.items(): 

319 if not isinstance(field, six.string_types): 

320 app_log.error('flattener:%s: key %s is not a str', filename, field) 

321 continue 

322 if isinstance(source, six.string_types): 

323 target = 'obj' 

324 if source: 

325 for item in source.split('.'): 

326 target += ('[%s]' if item.isdigit() else '[%r]') % item 

327 assign(field, target, catch_errors=True) 

328 elif source is True: 

329 assign(field, 'obj') 

330 elif isinstance(source, int) and not isinstance(source, bool): 

331 assign(field, 'obj[%d]' % source, catch_errors=True) 

332 else: 

333 app_log.error('flattener:%s: value %s is not a str/int', filename, source) 

334 continue 

335 body.append('\treturn r') 

336 code = compile(''.join(body), filename='flattener:%s' % filename, mode='exec') 

337 context = {'AttrDict': AttrDict, 'default': default} 

338 eval(code, context) # nosec - code constructed entirely in this function 

339 return context[filename] 

340 

341 

342_once_info = {} 

343 

344 

345def once(*args, **kwargs): 

346 ''' 

347 Returns False if once() has been called before with these arguments. Else True. 

348 Data is stored in a persistent SQLite dict. 

349 ''' 

350 if 'db' not in _once_info: 

351 import os 

352 from sqlitedict import SqliteDict 

353 dbpath = os.path.join(variables['GRAMEXDATA'], 'once.db') 

354 _once_info['db'] = SqliteDict(dbpath, tablename='once', autocommit=True) 

355 db = _once_info['db'] 

356 key = json.dumps(args, separators=(',', ':'), cls=CustomJSONEncoder) 

357 if kwargs.get('_clear', False): 

358 if key in db: 

359 del db[key] 

360 return None 

361 if key in db: 

362 return False 

363 db[key] = True 

364 return True