Coverage for gramex\transforms\transforms.py : 66%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import ast
2import six
3import json
4import importlib
5import tornado.gen
6from types import GeneratorType
7from orderedattrdict import AttrDict
8from gramex.config import app_log, locate, variables, CustomJSONEncoder
11def _arg_repr(arg):
12 '''
13 Arguments starting with ``=`` are converted into the variable. Otherwise,
14 values are treated as strings. For example, ``=x`` is the variable ``x`` but
15 ``x`` is the string ``"x"``. ``==x`` is the string ``"=x"``.
16 '''
17 if isinstance(arg, six.string_types):
18 if arg.startswith('=='): 18 ↛ 19line 18 didn't jump to line 19, because the condition on line 18 was never true
19 return repr(arg[1:]) # "==x" becomes '"=x"'
20 elif arg.startswith('='):
21 return arg[1:] # "=x" becomes 'x'
22 return repr(arg) # "x" becomes '"x"', 1 becomes '1', etc
25def _full_name(tree):
26 '''Decompile ast tree for "x", "module.x", "package.module.x", etc'''
27 if isinstance(tree, ast.Name):
28 return tree.id
29 elif isinstance(tree, ast.Attribute):
30 parent = _full_name(tree.value)
31 return parent + '.' + tree.attr if parent is not None else parent
32 return None
35def module_names(node, vars):
36 '''
37 Collects a list of modules mentioned in an AST tree. Ignores things in vars
39 visitor = ModuleNameVisitor()
40 visitor.visit(ast.parse(expression))
41 visitor.modules
42 '''
43 context = []
44 modules = set()
46 def visit(node):
47 if not hasattr(node, '_fields'): 47 ↛ 48line 47 didn't jump to line 48, because the condition on line 47 was never true
48 return
49 for child in ast.iter_child_nodes(node):
50 if isinstance(child, ast.Name):
51 if len(context) and context[-1]:
52 module = [child.id]
53 for p in context[::-1]: 53 ↛ 58line 53 didn't jump to line 58, because the loop on line 53 didn't complete
54 if p is not None:
55 module.append(p)
56 else:
57 break
58 if len(module) and module[0] not in vars:
59 module.pop()
60 while len(module):
61 module_name = '.'.join(module)
62 try:
63 importlib.import_module(module_name)
64 modules.add(module_name)
65 break
66 except ImportError:
67 module.pop()
68 # Anything other than an ImportError means we've identified the module.
69 # E.g. A SyntaxError means the file is right, it just has an error.
70 # Add these modules as well.
71 else:
72 modules.add(module_name)
73 break
74 context.append(child.attr if isinstance(child, ast.Attribute) else None)
75 visit(child)
76 context.pop()
78 visit(node)
79 return modules
82def build_transform(conf, vars=None, filename='transform', cache=False, iter=True):
83 '''
84 Converts an expression into a callable function. For e.g.::
86 function: json.dumps("x", separators: [",", ":"])
88 translates to::
90 fn = build_transform(conf={
91 'function': 'json.dumps("x", separators: [",", ":"])',
92 })
94 which becomes::
96 def transform(_val):
97 import json
98 result = json.dumps("x", separators=[",", ":""])
99 return result if isinstance(result, GeneratorType) else (result,)
101 The same can also be achieved via::
103 function: json.dumps
104 args: ["x"]
105 kwargs:
106 separators: [",", ":"]
108 Any Python expression is also allowed. The following are valid functions::
110 function: 1 # returns 1
111 function: _val + 1 # Increments the input parameter by 1
112 function: json.dumps(_val) # Returns the input as a string
113 function: json.dumps # This is the same as json.dumps(_val)
115 ``build_transform`` also takes an optional ``filename=`` parameter that sets
116 the "filename" of the returned function. This is useful for log messages.
118 It takes an optional ``cache=True`` that permanently caches the transform.
119 The default is ``False`` that re-imports the function's module if changed.
121 The returned function takes a single argument called ``_val`` by default. You
122 can change the arguments it accepts using ``vars``. For example::
124 fn = build_transform(..., vars={'x': None, 'y': 1})
126 creates::
128 def transform(x=None, y=1):
129 ...
131 Or pass ``vars={}`` for function that does not accept any parameters.
133 The returned function returns an iterable containing the values. If the
134 function returns a single value, you can get it on the first iteration. If
135 the function returns a generator object, that is returned as-is.
137 But if ``iter=False`` is passed, the returned function just contains the
138 returned value as-is -- not as a list.
140 In the ``conf`` parameter, ``args`` and ``kwargs`` values are interpreted
141 literally. But values starting with ``=`` like ``=args`` are treated as
142 variables. (Start ``==`` to represent a string that begins with ``=``.) For
143 example, when this is called with ``vars={"handler": None}``::
145 function: json.dumps
146 args: =handler
147 kwargs:
148 key: abc
149 name: =handler.name
151 becomes::
153 def transform(handler=None):
154 return json.dumps(handler, key="abc", name=handler.name)
155 '''
156 # Ensure that the transform is a dict. This is a common mistake. We forget
157 # the pattern: prefix
158 if not hasattr(conf, 'items'): 158 ↛ 159line 158 didn't jump to line 159, because the condition on line 158 was never true
159 raise ValueError('%s: needs {function: name}. Got %s' % (filename, repr(conf)))
161 conf = {key: val for key, val in conf.items() if key in {'function', 'args', 'kwargs'}}
163 # The returned function takes a single argument by default
164 if vars is None: 164 ↛ 165line 164 didn't jump to line 165, because the condition on line 164 was never true
165 vars = {'_val': None}
167 if 'function' not in conf or not conf['function']:
168 raise KeyError('%s: No function in conf %s' % (filename, conf))
170 # Get the name of the function in case it's specified as a function call
171 # expr is the full function / expression, e.g. six.text_type("abc")
172 # tree is the ast result
173 expr = conf['function']
174 tree = ast.parse(expr)
175 if len(tree.body) != 1 or not isinstance(tree.body[0], ast.Expr): 175 ↛ 176line 175 didn't jump to line 176, because the condition on line 175 was never true
176 raise ValueError('%s: function: must be an Python function or expression, not %s',
177 (filename, expr))
179 # Check whether to use the expression as is, or construct the expression
180 # If expr is like "x" or "module.x", construct it if it's callable
181 # Else, use the expression as-is
182 function_name = _full_name(tree.body[0].value)
183 if function_name is not None:
184 function = locate(function_name, modules=['gramex.transforms'])
185 if function is None:
186 app_log.error('%s: Cannot load function %s' % (filename, function_name))
187 # This section converts the function into an expression.
188 # We do this only if the original expression was a *callable* function.
189 # But if we can't load the original function (e.g. SyntaxError),
190 # treat that as a function as well, allowing users to correct it later.
191 if callable(function) or function is None: 191 ↛ 207line 191 didn't jump to line 207, because the condition on line 191 was never false
192 if 'args' in conf:
193 # If args is not a list, convert to a list with that value
194 args = conf['args'] if isinstance(conf['args'], list) else [conf['args']]
195 else:
196 # If args is not specified, use vars' keys as args
197 args = ['=%s' % var for var in vars.keys()]
198 # Add the function, arguments, and kwargs
199 expr = function_name + '('
200 for arg in args:
201 expr += '%s, ' % _arg_repr(arg)
202 for key, val in conf.get('kwargs', {}).items():
203 expr += '%s=%s, ' % (key, _arg_repr(val))
204 expr += ')'
206 # Create the code
207 modules = module_names(tree, vars)
208 modulestr = ', '.join(sorted(modules))
209 body = [
210 'def transform(', ', '.join('{:s}={!r:}'.format(k, v) for k, v in vars.items()), '):\n',
211 '\timport %s\n' % modulestr if modulestr else '',
212 '\treload_module(%s)\n' % modulestr if modulestr and not cache else '',
213 '\tresult = %s\n' % expr,
214 # If the result is a generator object, return it. Else, create a list and
215 # return that. This ensures that the returned value is always an iterable
216 '\treturn result if isinstance(result, GeneratorType) else [result,]' if iter else
217 '\treturn result',
218 ]
220 # Compile the function with context variables
221 import gramex.transforms
222 from gramex.cache import reload_module
223 context = dict(
224 reload_module=reload_module,
225 GeneratorType=GeneratorType,
226 Return=tornado.gen.Return,
227 AttrDict=AttrDict,
228 **{key: getattr(gramex.transforms, key) for key in gramex.transforms.__all__}
229 )
230 code = compile(''.join(body), filename=filename, mode='exec')
231 exec(code, context) # nosec - OK to run arbitrary Python code in YAML
233 # Return the transformed function
234 function = context['transform']
235 function.__name__ = str(function_name or filename)
236 function.__doc__ = str(function.__doc__)
238 return function
241def condition(*args):
242 '''
243 DEPRECATED. Use the ``if`` construct in config keys instead.
245 Variables can also be computed based on conditions::
247 variables:
248 OS:
249 default: 'No OS variable defined'
250 PORT:
251 function: condition
252 args:
253 - $OS.startswith('Windows')
254 - 9991
255 - $OS.startswith('Linux')
256 - 9992
257 - 8883
258 '''
259 from string import Template
260 var_defaults = {}
261 for var in variables:
262 var_defaults[var] = "variables.get('%s', '')" % var
263 # could use iter, range(.., 2)
264 if len(args) == 1 and isinstance(args[0], dict):
265 pairs = args[0].items()
266 else:
267 pairs = zip(args[0::2], args[1::2])
268 for cond, val in pairs:
269 if isinstance(cond, six.string_types):
270 if eval(Template(cond).substitute(var_defaults)): # nosec - any Python expr is OK
271 return val
272 elif bool(cond): 272 ↛ 273line 272 didn't jump to line 273, because the condition on line 272 was never true
273 return val
275 # If none of the conditions matched, we'll be here.
276 # If there are an odd number of arguments and there's at least one condition,
277 # treat the last as a default.
278 if len(args) % 2 == 1 and len(args) > 2:
279 return args[-1]
282def flattener(fields, default=None, filename='flatten'):
283 '''
284 Generates a function that flattens deep dictionaries. For example::
286 >>> flat = flattener({
287 'id': 'id',
288 'name': 'user.screen_name'
289 })
290 >>> flat({'id': 1, 'user': {'screen_name': 'name'}})
291 {'id': 1, 'name': 'name'}
293 Fields map as follows::
295 '' => obj
296 True => obj
297 1 => obj[1]
298 'x' => obj['x']
299 'x.y' => obj['x']['y']
300 '1.x' => obj[1]['x']
302 Missing values map to ``None``. You can change ``None`` to '' passing a
303 ``default=''`` or any other default value.
304 '''
305 body = [
306 'def %s(obj):\n' % filename,
307 '\tr = AttrDict()\n',
308 ]
310 def assign(field, target, catch_errors=False):
311 field = repr(field)
312 if catch_errors:
313 body.append('\ttry: r[%s] = %s\n' % (field, target))
314 body.append('\texcept (KeyError, TypeError, IndexError): r[%s] = default\n' % field)
315 else:
316 body.append('\tr[%s] = %s\n' % (field, target))
318 for field, source in fields.items():
319 if not isinstance(field, six.string_types):
320 app_log.error('flattener:%s: key %s is not a str', filename, field)
321 continue
322 if isinstance(source, six.string_types):
323 target = 'obj'
324 if source:
325 for item in source.split('.'):
326 target += ('[%s]' if item.isdigit() else '[%r]') % item
327 assign(field, target, catch_errors=True)
328 elif source is True:
329 assign(field, 'obj')
330 elif isinstance(source, int) and not isinstance(source, bool):
331 assign(field, 'obj[%d]' % source, catch_errors=True)
332 else:
333 app_log.error('flattener:%s: value %s is not a str/int', filename, source)
334 continue
335 body.append('\treturn r')
336 code = compile(''.join(body), filename='flattener:%s' % filename, mode='exec')
337 context = {'AttrDict': AttrDict, 'default': default}
338 eval(code, context) # nosec - code constructed entirely in this function
339 return context[filename]
342_once_info = {}
345def once(*args, **kwargs):
346 '''
347 Returns False if once() has been called before with these arguments. Else True.
348 Data is stored in a persistent SQLite dict.
349 '''
350 if 'db' not in _once_info:
351 import os
352 from sqlitedict import SqliteDict
353 dbpath = os.path.join(variables['GRAMEXDATA'], 'once.db')
354 _once_info['db'] = SqliteDict(dbpath, tablename='once', autocommit=True)
355 db = _once_info['db']
356 key = json.dumps(args, separators=(',', ':'), cls=CustomJSONEncoder)
357 if kwargs.get('_clear', False):
358 if key in db:
359 del db[key]
360 return None
361 if key in db:
362 return False
363 db[key] = True
364 return True