Coverage for gramex\handlers\filehandler.py : 96%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import re
2import string
3import datetime
4import mimetypes
5import tornado.web
6import tornado.gen
7from pathlib import Path
8from fnmatch import fnmatch
9from six import string_types, text_type
10from tornado.escape import utf8
11from tornado.web import HTTPError
12from collections import defaultdict
13from orderedattrdict import AttrDict
14from six.moves.urllib.parse import urljoin
15from .basehandler import BaseHandler
16from gramex.config import objectpath, app_log
17from gramex import conf as gramex_conf
18from gramex.http import FORBIDDEN, NOT_FOUND
20# Directory indices are served using this template by default
21_default_index_template = Path(__file__).absolute().parent / 'filehandler.template.html'
24def _match(path, pat):
25 '''
26 Check if path matches pattern -- case insensitively.
27 '''
28 return fnmatch(str(path).lower(), '*/' + pat.lower())
31def read_template(path):
32 if not path.exists():
33 app_log.warning('Missing directory template "%s". Using "%s"' %
34 (path, _default_index_template))
35 path = _default_index_template
36 with path.open(encoding='utf-8') as handle:
37 return string.Template(handle.read())
40class FileHandler(BaseHandler):
41 '''
42 Serves files with transformations. It accepts these parameters:
44 :arg string path: Can be one of these:
46 - The filename to serve. For all files matching the pattern, this
47 filename is returned.
48 - The root directory from which files are served. The first parameter of
49 the URL pattern is the file path under this directory. Relative paths
50 are specified from where gramex was run.
51 - A wildcard path where `*` is replaced by the URL pattern's first
52 `(..)` group.
53 - A list of files to serve. These files are concatenated and served one
54 after the other.
55 - A dict of {regex: path}. If the URL matches the regex, the path is
56 served. The path is string formatted using the regex capture groups
58 :arg string default_filename: If the URL maps to a directory, this filename
59 is displayed by default. For example, ``index.html`` or ``README.md``.
60 The default is ``None``, which displays all files in the directory.
61 :arg boolean index: If ``true``, shows a directory index. If ``false``,
62 raises a HTTP 404: Not Found error when users try to access a directory.
63 :arg list ignore: List of glob patterns to ignore. Even if the path matches
64 these, the files will not be served.
65 :arg list allow: List of glob patterns to allow. This overrides the ignore
66 patterns, so use with care.
67 :arg list methods: List of HTTP methods to allow. Defaults to
68 `['GET', 'HEAD', 'POST']`.
69 :arg string index_template: The file to be used as the template for
70 displaying the index. If this file is missing, it defaults to Gramex's
71 default ``filehandler.template.html``. It can use these string
72 variables:
74 - ``$path`` - the directory name
75 - ``$body`` - an unordered list with all filenames as links
76 :arg string template: Indicates that the contents of files matching this
77 string pattern must be treated as a Tornado template. This is the same as
78 specifying a ``function: template`` with the template string as a
79 pattern. (new in Gramex 1.14).
80 :arg dict headers: HTTP headers to set on the response.
81 :arg dict transform: Transformations that should be applied to the files.
82 The key matches a `glob pattern`_ (e.g. ``'*.md'`` or ``'data/*'``.) The
83 value is a dict with the same structure as :class:`FunctionHandler`,
84 and accepts these keys:
86 ``encoding``
87 The encoding to load the file as. If you don't specify an encoding,
88 file contents are passed to ``function`` as a binary string.
90 ``function``
91 A string that resolves into any Python function or method (e.g.
92 ``markdown.markdown``). By default, it is called with the file
93 contents as ``function(content)`` and the result is rendered as-is
94 (hence must be a string.)
96 ``args``
97 optional positional arguments to be passed to the function. By
98 default, this is just ``['content']`` where ``content`` is the file
99 contents. You can also pass the handler via ``['handler']``, or both
100 of them in any order.
102 ``kwargs``:
103 an optional list of keyword arguments to be passed to the function.
104 A value with of ``handler`` and ``content`` is replaced with the
105 RequestHandler and file contents respectively.
107 ``headers``:
108 HTTP headers to set on the response.
110 .. _glob pattern: https://docs.python.org/3/library/pathlib.html#pathlib.Path.glob
112 FileHandler exposes these attributes:
114 - ``root``: Root path for this handler. Aligns with the ``path`` argument
115 - ``path``; Absolute path requested by the user, without adding a default filename
116 - ``file``: Absolute path served to the user, after adding a default filename
117 '''
119 @classmethod
120 def setup(cls, path, default_filename=None, index=None, index_template=None,
121 template=None, headers={}, default={}, methods=['GET', 'HEAD', 'POST'], **kwargs):
122 # Convert template: '*.html' into transform: {'*.html': {function: template}}
123 # Do this before BaseHandler setup so that it can invoke the transforms required
124 if template is not None:
125 if template is True:
126 template = '*'
127 kwargs.setdefault('transform', AttrDict())[template] = AttrDict(function='template')
128 super(FileHandler, cls).setup(**kwargs)
130 cls.root, cls.pattern = None, None
131 if isinstance(path, dict):
132 cls.root = AttrDict([(re.compile(p + '$'), val) for p, val in path.items()])
133 elif isinstance(path, list):
134 cls.root = [Path(path_item).absolute() for path_item in path]
135 elif '*' in path:
136 cls.pattern = path
137 else:
138 cls.root = Path(path).absolute()
139 cls.default_filename = default_filename
140 cls.index = index
141 cls.ignore = cls.set(cls.kwargs.ignore)
142 cls.allow = cls.set(cls.kwargs.allow)
143 cls.default = default
144 cls.index_template = read_template(
145 Path(index_template) if index_template is not None else _default_index_template)
146 cls.headers = AttrDict(objectpath(gramex_conf, 'handlers.FileHandler.headers', {}))
147 cls.headers.update(headers)
148 # Set supported methods
149 for method in (methods if isinstance(methods, (tuple, list)) else [methods]):
150 method = method.lower()
151 setattr(cls, method, cls._head if method == 'head' else cls._get)
153 @classmethod
154 def set(cls, value):
155 '''
156 Convert value to a set. If value is already a list, set, tuple, return as is.
157 Ensure that the values are non-empty strings.
158 '''
159 result = set(value) if isinstance(value, (list, tuple, set)) else set([value])
160 for pattern in result:
161 if not pattern: 161 ↛ 162line 161 didn't jump to line 162, because the condition on line 161 was never true
162 app_log.warning('%s: Ignoring empty pattern "%r"', cls.name, pattern)
163 elif not isinstance(pattern, string_types): 163 ↛ 164line 163 didn't jump to line 164, because the condition on line 163 was never true
164 app_log.warning('%s: pattern "%r" is not a string. Ignoring.', cls.name, pattern)
165 result.add(pattern)
166 return result
168 @tornado.gen.coroutine
169 def _head(self, *args, **kwargs):
170 kwargs['include_body'] = False
171 yield self._get(*args, **kwargs)
173 @tornado.gen.coroutine
174 def _get(self, *args, **kwargs):
175 self.include_body = kwargs.pop('include_body', True)
176 path = urljoin('/', args[0] if len(args) else '').lstrip('/')
177 if isinstance(self.root, list):
178 # Concatenate multiple files and serve them one after another
179 for path_item in self.root:
180 yield self._get_path(path_item, multipart=True)
181 elif isinstance(self.root, dict):
182 # Render path for the the first matching regex
183 for pattern, filestr in self.root.items(): 183 ↛ 194line 183 didn't jump to line 194, because the loop on line 183 didn't complete
184 match = pattern.match(path)
185 if match:
186 q = defaultdict(text_type, **self.default)
187 q.update({k: v[0] for k, v in self.args.items() if len(v) > 0})
188 q.update(match.groupdict())
189 p = Path(filestr.format(*match.groups(), **q)).absolute()
190 app_log.debug('%s: %s renders %s', self.name, self.request.path, p)
191 yield self._get_path(p)
192 break
193 else:
194 raise HTTPError(NOT_FOUND, '%s matches no path key', self.request.path)
195 elif not args:
196 # No group has been specified in the pattern. So just serve root
197 yield self._get_path(self.root)
198 else:
199 # Eliminate parent directory references like `../` in the URL
200 path = urljoin('/', path)[1:]
201 if self.pattern:
202 yield self._get_path(Path(self.pattern.replace('*', path)).absolute())
203 else:
204 yield self._get_path(self.root / path if self.root.is_dir() else self.root)
206 def allowed(self, path):
207 '''
208 A path is allowed if it matches any allow:, or matches no ignore:.
209 Override this method for a custom implementation.
210 '''
211 for ignore in self.ignore:
212 if _match(path, ignore):
213 # Check allows only if an ignore: is matched.
214 # If any allow: is matched, allow it
215 for allow in self.allow:
216 if _match(path, allow):
217 return True
218 app_log.debug('%s: Disallow "%s". It matches "%s"', self.name, path, ignore)
219 return False
220 return True
222 @tornado.gen.coroutine
223 def _get_path(self, path, multipart=False):
224 # If the file doesn't exist, raise a 404: Not Found
225 try:
226 path = path.resolve()
227 except OSError:
228 raise HTTPError(NOT_FOUND, '%s missing', path)
230 self.path = path
231 if self.path.is_dir():
232 self.file = self.path / self.default_filename if self.default_filename else self.path
233 if not (self.default_filename and self.file.exists()) and not self.index:
234 raise HTTPError(NOT_FOUND, '%s missing index', self.file)
235 # Ensure URL has a trailing '/' when displaying the index / default file
236 if not self.request.path.endswith('/'):
237 suffix = '/?' + self.request.query if self.request.query else '/'
238 self.redirect(self.request.path + suffix, permanent=True)
239 return
240 else:
241 self.file = self.path
242 if not self.file.exists():
243 raise HTTPError(NOT_FOUND, '%s missing', self.file)
244 elif not self.file.is_file(): 244 ↛ 245line 244 didn't jump to line 245, because the condition on line 244 was never true
245 raise HTTPError(FORBIDDEN, '%s is not a file', self.path)
247 if not self.allowed(self.file):
248 raise HTTPError(FORBIDDEN, '%s not permitted', self.file)
250 if self.path.is_dir() and self.index and not (
251 self.default_filename and self.file.exists()):
252 self.set_header('Content-Type', 'text/html; charset=UTF-8')
253 content = []
254 file_template = string.Template(u'<li><a href="$path">$name</a></li>')
255 for path in self.path.iterdir():
256 if path.is_symlink(): 256 ↛ 257line 256 didn't jump to line 257, because the condition on line 256 was never true
257 name_suffix, path_suffix = ' ►', ''
258 elif path.is_dir():
259 name_suffix = path_suffix = '/'
260 else:
261 name_suffix = path_suffix = ''
262 # On Windows, pathlib on Python 2.7 won't handle Unicode. Ignore such files.
263 # https://bitbucket.org/pitrou/pathlib/issues/25
264 try:
265 path = str(path.relative_to(self.path))
266 content.append(file_template.substitute(
267 path=path + path_suffix,
268 name=path + name_suffix,
269 ))
270 except UnicodeDecodeError:
271 app_log.warning("FileHandler can't show unicode file {!r:s}".format(path))
272 content.append(u'</ul>')
273 self.content = self.index_template.substitute(path=self.path, body=''.join(content))
275 else:
276 modified = self.file.stat().st_mtime
277 self.set_header('Last-Modified', datetime.datetime.utcfromtimestamp(modified))
279 mime_type = mimetypes.types_map.get(self.file.suffix.lower())
280 if mime_type is not None:
281 if mime_type.startswith('text/'):
282 mime_type += '; charset=UTF-8'
283 self.set_header('Content-Type', mime_type)
285 for header_name, header_value in self.headers.items():
286 if isinstance(header_value, dict):
287 if _match(self.file, header_name):
288 for header_name, header_value in header_value.items():
289 self.set_header(header_name, header_value)
290 else:
291 self.set_header(header_name, header_value)
293 transform = {}
294 for pattern, trans in self.transform.items():
295 if _match(self.file, pattern):
296 transform = trans
297 break
299 encoding = transform.get('encoding')
300 with self.file.open('rb' if encoding is None else 'r', encoding=encoding) as file:
301 self.content = file.read()
302 if transform:
303 for header_name, header_value in transform['headers'].items():
304 self.set_header(header_name, header_value)
306 output = []
307 for item in transform['function'](content=self.content, handler=self):
308 if tornado.concurrent.is_future(item):
309 item = yield item
310 output.append(item)
311 self.content = ''.join(output)
312 self.set_header('Content-Length', len(utf8(self.content)))
314 if self.include_body:
315 self.write(self.content)
316 # Do not flush unless it's multipart. Flushing disables Etag
317 if multipart:
318 self.flush()