Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import re 

2import string 

3import datetime 

4import mimetypes 

5import tornado.web 

6import tornado.gen 

7from pathlib import Path 

8from fnmatch import fnmatch 

9from six import string_types, text_type 

10from tornado.escape import utf8 

11from tornado.web import HTTPError 

12from collections import defaultdict 

13from orderedattrdict import AttrDict 

14from six.moves.urllib.parse import urljoin 

15from .basehandler import BaseHandler 

16from gramex.config import objectpath, app_log 

17from gramex import conf as gramex_conf 

18from gramex.http import FORBIDDEN, NOT_FOUND 

19 

20# Directory indices are served using this template by default 

21_default_index_template = Path(__file__).absolute().parent / 'filehandler.template.html' 

22 

23 

24def _match(path, pat): 

25 ''' 

26 Check if path matches pattern -- case insensitively. 

27 ''' 

28 return fnmatch(str(path).lower(), '*/' + pat.lower()) 

29 

30 

31def read_template(path): 

32 if not path.exists(): 

33 app_log.warning('Missing directory template "%s". Using "%s"' % 

34 (path, _default_index_template)) 

35 path = _default_index_template 

36 with path.open(encoding='utf-8') as handle: 

37 return string.Template(handle.read()) 

38 

39 

40class FileHandler(BaseHandler): 

41 ''' 

42 Serves files with transformations. It accepts these parameters: 

43 

44 :arg string path: Can be one of these: 

45 

46 - The filename to serve. For all files matching the pattern, this 

47 filename is returned. 

48 - The root directory from which files are served. The first parameter of 

49 the URL pattern is the file path under this directory. Relative paths 

50 are specified from where gramex was run. 

51 - A wildcard path where `*` is replaced by the URL pattern's first 

52 `(..)` group. 

53 - A list of files to serve. These files are concatenated and served one 

54 after the other. 

55 - A dict of {regex: path}. If the URL matches the regex, the path is 

56 served. The path is string formatted using the regex capture groups 

57 

58 :arg string default_filename: If the URL maps to a directory, this filename 

59 is displayed by default. For example, ``index.html`` or ``README.md``. 

60 The default is ``None``, which displays all files in the directory. 

61 :arg boolean index: If ``true``, shows a directory index. If ``false``, 

62 raises a HTTP 404: Not Found error when users try to access a directory. 

63 :arg list ignore: List of glob patterns to ignore. Even if the path matches 

64 these, the files will not be served. 

65 :arg list allow: List of glob patterns to allow. This overrides the ignore 

66 patterns, so use with care. 

67 :arg list methods: List of HTTP methods to allow. Defaults to 

68 `['GET', 'HEAD', 'POST']`. 

69 :arg string index_template: The file to be used as the template for 

70 displaying the index. If this file is missing, it defaults to Gramex's 

71 default ``filehandler.template.html``. It can use these string 

72 variables: 

73 

74 - ``$path`` - the directory name 

75 - ``$body`` - an unordered list with all filenames as links 

76 :arg string template: Indicates that the contents of files matching this 

77 string pattern must be treated as a Tornado template. This is the same as 

78 specifying a ``function: template`` with the template string as a 

79 pattern. (new in Gramex 1.14). 

80 :arg dict headers: HTTP headers to set on the response. 

81 :arg dict transform: Transformations that should be applied to the files. 

82 The key matches a `glob pattern`_ (e.g. ``'*.md'`` or ``'data/*'``.) The 

83 value is a dict with the same structure as :class:`FunctionHandler`, 

84 and accepts these keys: 

85 

86 ``encoding`` 

87 The encoding to load the file as. If you don't specify an encoding, 

88 file contents are passed to ``function`` as a binary string. 

89 

90 ``function`` 

91 A string that resolves into any Python function or method (e.g. 

92 ``markdown.markdown``). By default, it is called with the file 

93 contents as ``function(content)`` and the result is rendered as-is 

94 (hence must be a string.) 

95 

96 ``args`` 

97 optional positional arguments to be passed to the function. By 

98 default, this is just ``['content']`` where ``content`` is the file 

99 contents. You can also pass the handler via ``['handler']``, or both 

100 of them in any order. 

101 

102 ``kwargs``: 

103 an optional list of keyword arguments to be passed to the function. 

104 A value with of ``handler`` and ``content`` is replaced with the 

105 RequestHandler and file contents respectively. 

106 

107 ``headers``: 

108 HTTP headers to set on the response. 

109 

110 .. _glob pattern: https://docs.python.org/3/library/pathlib.html#pathlib.Path.glob 

111 

112 FileHandler exposes these attributes: 

113 

114 - ``root``: Root path for this handler. Aligns with the ``path`` argument 

115 - ``path``; Absolute path requested by the user, without adding a default filename 

116 - ``file``: Absolute path served to the user, after adding a default filename 

117 ''' 

118 

119 @classmethod 

120 def setup(cls, path, default_filename=None, index=None, index_template=None, 

121 template=None, headers={}, default={}, methods=['GET', 'HEAD', 'POST'], **kwargs): 

122 # Convert template: '*.html' into transform: {'*.html': {function: template}} 

123 # Do this before BaseHandler setup so that it can invoke the transforms required 

124 if template is not None: 

125 if template is True: 

126 template = '*' 

127 kwargs.setdefault('transform', AttrDict())[template] = AttrDict(function='template') 

128 super(FileHandler, cls).setup(**kwargs) 

129 

130 cls.root, cls.pattern = None, None 

131 if isinstance(path, dict): 

132 cls.root = AttrDict([(re.compile(p + '$'), val) for p, val in path.items()]) 

133 elif isinstance(path, list): 

134 cls.root = [Path(path_item).absolute() for path_item in path] 

135 elif '*' in path: 

136 cls.pattern = path 

137 else: 

138 cls.root = Path(path).absolute() 

139 cls.default_filename = default_filename 

140 cls.index = index 

141 cls.ignore = cls.set(cls.kwargs.ignore) 

142 cls.allow = cls.set(cls.kwargs.allow) 

143 cls.default = default 

144 cls.index_template = read_template( 

145 Path(index_template) if index_template is not None else _default_index_template) 

146 cls.headers = AttrDict(objectpath(gramex_conf, 'handlers.FileHandler.headers', {})) 

147 cls.headers.update(headers) 

148 # Set supported methods 

149 for method in (methods if isinstance(methods, (tuple, list)) else [methods]): 

150 method = method.lower() 

151 setattr(cls, method, cls._head if method == 'head' else cls._get) 

152 

153 @classmethod 

154 def set(cls, value): 

155 ''' 

156 Convert value to a set. If value is already a list, set, tuple, return as is. 

157 Ensure that the values are non-empty strings. 

158 ''' 

159 result = set(value) if isinstance(value, (list, tuple, set)) else set([value]) 

160 for pattern in result: 

161 if not pattern: 161 ↛ 162line 161 didn't jump to line 162, because the condition on line 161 was never true

162 app_log.warning('%s: Ignoring empty pattern "%r"', cls.name, pattern) 

163 elif not isinstance(pattern, string_types): 163 ↛ 164line 163 didn't jump to line 164, because the condition on line 163 was never true

164 app_log.warning('%s: pattern "%r" is not a string. Ignoring.', cls.name, pattern) 

165 result.add(pattern) 

166 return result 

167 

168 @tornado.gen.coroutine 

169 def _head(self, *args, **kwargs): 

170 kwargs['include_body'] = False 

171 yield self._get(*args, **kwargs) 

172 

173 @tornado.gen.coroutine 

174 def _get(self, *args, **kwargs): 

175 self.include_body = kwargs.pop('include_body', True) 

176 path = urljoin('/', args[0] if len(args) else '').lstrip('/') 

177 if isinstance(self.root, list): 

178 # Concatenate multiple files and serve them one after another 

179 for path_item in self.root: 

180 yield self._get_path(path_item, multipart=True) 

181 elif isinstance(self.root, dict): 

182 # Render path for the the first matching regex 

183 for pattern, filestr in self.root.items(): 183 ↛ 194line 183 didn't jump to line 194, because the loop on line 183 didn't complete

184 match = pattern.match(path) 

185 if match: 

186 q = defaultdict(text_type, **self.default) 

187 q.update({k: v[0] for k, v in self.args.items() if len(v) > 0}) 

188 q.update(match.groupdict()) 

189 p = Path(filestr.format(*match.groups(), **q)).absolute() 

190 app_log.debug('%s: %s renders %s', self.name, self.request.path, p) 

191 yield self._get_path(p) 

192 break 

193 else: 

194 raise HTTPError(NOT_FOUND, '%s matches no path key', self.request.path) 

195 elif not args: 

196 # No group has been specified in the pattern. So just serve root 

197 yield self._get_path(self.root) 

198 else: 

199 # Eliminate parent directory references like `../` in the URL 

200 path = urljoin('/', path)[1:] 

201 if self.pattern: 

202 yield self._get_path(Path(self.pattern.replace('*', path)).absolute()) 

203 else: 

204 yield self._get_path(self.root / path if self.root.is_dir() else self.root) 

205 

206 def allowed(self, path): 

207 ''' 

208 A path is allowed if it matches any allow:, or matches no ignore:. 

209 Override this method for a custom implementation. 

210 ''' 

211 for ignore in self.ignore: 

212 if _match(path, ignore): 

213 # Check allows only if an ignore: is matched. 

214 # If any allow: is matched, allow it 

215 for allow in self.allow: 

216 if _match(path, allow): 

217 return True 

218 app_log.debug('%s: Disallow "%s". It matches "%s"', self.name, path, ignore) 

219 return False 

220 return True 

221 

222 @tornado.gen.coroutine 

223 def _get_path(self, path, multipart=False): 

224 # If the file doesn't exist, raise a 404: Not Found 

225 try: 

226 path = path.resolve() 

227 except OSError: 

228 raise HTTPError(NOT_FOUND, '%s missing', path) 

229 

230 self.path = path 

231 if self.path.is_dir(): 

232 self.file = self.path / self.default_filename if self.default_filename else self.path 

233 if not (self.default_filename and self.file.exists()) and not self.index: 

234 raise HTTPError(NOT_FOUND, '%s missing index', self.file) 

235 # Ensure URL has a trailing '/' when displaying the index / default file 

236 if not self.request.path.endswith('/'): 

237 suffix = '/?' + self.request.query if self.request.query else '/' 

238 self.redirect(self.request.path + suffix, permanent=True) 

239 return 

240 else: 

241 self.file = self.path 

242 if not self.file.exists(): 

243 raise HTTPError(NOT_FOUND, '%s missing', self.file) 

244 elif not self.file.is_file(): 244 ↛ 245line 244 didn't jump to line 245, because the condition on line 244 was never true

245 raise HTTPError(FORBIDDEN, '%s is not a file', self.path) 

246 

247 if not self.allowed(self.file): 

248 raise HTTPError(FORBIDDEN, '%s not permitted', self.file) 

249 

250 if self.path.is_dir() and self.index and not ( 

251 self.default_filename and self.file.exists()): 

252 self.set_header('Content-Type', 'text/html; charset=UTF-8') 

253 content = [] 

254 file_template = string.Template(u'<li><a href="$path">$name</a></li>') 

255 for path in self.path.iterdir(): 

256 if path.is_symlink(): 256 ↛ 257line 256 didn't jump to line 257, because the condition on line 256 was never true

257 name_suffix, path_suffix = ' &#x25ba;', '' 

258 elif path.is_dir(): 

259 name_suffix = path_suffix = '/' 

260 else: 

261 name_suffix = path_suffix = '' 

262 # On Windows, pathlib on Python 2.7 won't handle Unicode. Ignore such files. 

263 # https://bitbucket.org/pitrou/pathlib/issues/25 

264 try: 

265 path = str(path.relative_to(self.path)) 

266 content.append(file_template.substitute( 

267 path=path + path_suffix, 

268 name=path + name_suffix, 

269 )) 

270 except UnicodeDecodeError: 

271 app_log.warning("FileHandler can't show unicode file {!r:s}".format(path)) 

272 content.append(u'</ul>') 

273 self.content = self.index_template.substitute(path=self.path, body=''.join(content)) 

274 

275 else: 

276 modified = self.file.stat().st_mtime 

277 self.set_header('Last-Modified', datetime.datetime.utcfromtimestamp(modified)) 

278 

279 mime_type = mimetypes.types_map.get(self.file.suffix.lower()) 

280 if mime_type is not None: 

281 if mime_type.startswith('text/'): 

282 mime_type += '; charset=UTF-8' 

283 self.set_header('Content-Type', mime_type) 

284 

285 for header_name, header_value in self.headers.items(): 

286 if isinstance(header_value, dict): 

287 if _match(self.file, header_name): 

288 for header_name, header_value in header_value.items(): 

289 self.set_header(header_name, header_value) 

290 else: 

291 self.set_header(header_name, header_value) 

292 

293 transform = {} 

294 for pattern, trans in self.transform.items(): 

295 if _match(self.file, pattern): 

296 transform = trans 

297 break 

298 

299 encoding = transform.get('encoding') 

300 with self.file.open('rb' if encoding is None else 'r', encoding=encoding) as file: 

301 self.content = file.read() 

302 if transform: 

303 for header_name, header_value in transform['headers'].items(): 

304 self.set_header(header_name, header_value) 

305 

306 output = [] 

307 for item in transform['function'](content=self.content, handler=self): 

308 if tornado.concurrent.is_future(item): 

309 item = yield item 

310 output.append(item) 

311 self.content = ''.join(output) 

312 self.set_header('Content-Length', len(utf8(self.content))) 

313 

314 if self.include_body: 

315 self.write(self.content) 

316 # Do not flush unless it's multipart. Flushing disables Etag 

317 if multipart: 

318 self.flush()