Coverage for gramex\handlers\proxyhandler.py : 86%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import six
2import tornado.web
3import tornado.gen
4from six.moves.urllib_parse import urlsplit, urlunsplit, parse_qs, urlencode
5from tornado.httputil import HTTPHeaders
6from tornado.httpclient import AsyncHTTPClient, HTTPRequest
7from gramex.transforms import build_transform
8from gramex.config import app_log
9from gramex.http import MOVED_PERMANENTLY, FOUND
10from .basehandler import BaseHandler
13class ProxyHandler(BaseHandler):
14 '''
15 Passes the request to another HTTP REST API endpoint and returns its
16 response. This is useful when:
18 - exposing another website but via Gramex authentication (e.g. R-Shiny apps)
19 - a server-side REST API must be accessed via the browser (e.g. Twitter)
20 - passing requests to an API that requires authentication (e.g. Google)
21 - the request or response needs to be transformed (e.g. add sentiment)
22 - caching is required on the API (e.g. cache for 10 min)
24 :arg string url: URL endpoint to forward to. If the pattern ends with
25 ``(.*)``, that part is added to this url.
26 :arg dict request_headers: HTTP headers to be passed to the url.
27 - ``"*": true`` forwards all HTTP headers from the request as-is.
28 - A value of ``true`` forwards this header from the request as-is.
29 - Any string value is formatted with ``handler`` as a variable.
30 :arg dict default: Default URL query parameters
31 :arg dict headers: HTTP headers to set on the response
32 :arg list methods: list of HTTP methods allowed (default: [GET, HEAD, POST])
33 :arg function prepare: A function that accepts any of ``handler`` and ``request``
34 (a tornado.httpclient.HTTPRequest) and modifies the ``request`` in-place
35 :arg function modify: A function that accepts any of ``handler``, ``request``
36 and ``response`` (tornado.httpclient.HTTPResponse) and modifies the
37 ``response`` in-place
38 :arg int connect_timeout: Timeout for initial connection in seconds (default: 20)
39 :arg int request_timeout: Timeout for entire request in seconds (default: 20)
41 Example YAML configuration::
43 pattern: /gmail/(.*)
44 handler: ProxyHandler
45 kwargs:
46 url: https://www.googleapis.com/gmail/v1/
47 request_headers:
48 "*": true # Pass on all HTTP headers
49 Cookie: true # Pass on the Cookie HTTP header
50 # Over-ride the Authorization header
51 Authorization: 'Bearer {handler.session[google_access_token]}'
52 default:
53 alt: json
55 The response has the same HTTP headers and body as the proxied request, but:
57 - Connection and Transfer-Encoding headers are ignored
58 - ``X-Proxy-Url:`` header has the final URL that responded (after redirects)
60 These headers can be over-ridden by the ``headers:`` section.
61 '''
62 @classmethod
63 def setup(cls, url, request_headers={}, default={}, prepare=None, modify=None,
64 headers={}, methods=['GET', 'HEAD', 'POST'],
65 connect_timeout=20, request_timeout=20, **kwargs):
66 super(ProxyHandler, cls).setup(**kwargs)
67 cls.url, cls.request_headers, cls.default = url, request_headers, default
68 cls.headers = headers
69 cls.connect_timeout, cls.request_timeout = connect_timeout, request_timeout
70 cls.info = {}
71 for key, fn in (('prepare', prepare), ('modify', modify)):
72 if fn: 72 ↛ 71line 72 didn't jump to line 71, because the condition on line 72 was never false
73 cls.info[key] = build_transform(
74 {'function': fn}, filename='url:%s.%s' % (cls.name, key),
75 vars={'handler': None, 'request': None, 'response': None})
76 cls.browser = AsyncHTTPClient()
77 for method in methods:
78 setattr(cls, method.lower(), cls.method)
80 @tornado.gen.coroutine
81 def method(self, *path_args):
82 # Construct HTTP headers
83 headers = HTTPHeaders(self.request.headers if self.request_headers.get('*', None) else {})
84 for key, val in self.request_headers.items():
85 if key == '*': 85 ↛ 86line 85 didn't jump to line 86, because the condition on line 85 was never true
86 continue
87 if val is True:
88 if key in self.request.headers:
89 headers[key] = self.request.headers[key]
90 else:
91 headers[key] = six.text_type(val).format(handler=self)
93 # Update query parameters
94 # TODO: use a named capture for path_args? This is not the right method
95 parts = urlsplit(self.url.format(*path_args))
96 params = {
97 key: ([six.text_type(v).format(handler=self) for v in val] if isinstance(val, list)
98 else six.text_type(val).format(handler=self))
99 for key, val in self.default.items()
100 }
101 params.update(parse_qs(parts.query))
102 params.update(self.args)
103 query = urlencode(params, doseq=True)
104 url = urlunsplit((parts.scheme, parts.netloc, parts.path, query, parts.fragment))
106 request = HTTPRequest(
107 url=url,
108 method=self.request.method,
109 headers=headers,
110 body=None if self.request.method == 'GET' else self.request.body,
111 connect_timeout=self.connect_timeout,
112 request_timeout=self.request_timeout,
113 )
115 if 'prepare' in self.info: 115 ↛ 118line 115 didn't jump to line 118, because the condition on line 115 was never false
116 self.info['prepare'](handler=self, request=request, response=None)
118 app_log.debug('%s: proxying %s', self.name, url)
119 response = yield self.browser.fetch(request, raise_error=False)
121 if response.code in (MOVED_PERMANENTLY, FOUND): 121 ↛ 122line 121 didn't jump to line 122, because the condition on line 121 was never true
122 location = response.headers.get('Location', '')
123 # TODO; check if Location: header MATCHES the url, not startswith
124 # url: example.org/?x should match Location: example.org/?a=1&x
125 # even though location does not start with url.
126 if location.startswith(url):
127 response.headers['Location'] = location.replace('url', self.conf.pattern)
129 if 'modify' in self.info: 129 ↛ 133line 129 didn't jump to line 133, because the condition on line 129 was never false
130 self.info['modify'](handler=self, request=request, response=response)
132 # Pass on the headers as-is, but override with the handler HTTP headers
133 self.set_header('X-Proxy-Url', response.effective_url)
134 for header_name, header_value in response.headers.items():
135 if header_name not in {'Connection', 'Transfer-Encoding', 'Content-Length'}:
136 self.set_header(header_name, header_value)
137 # Proxies may send the wrong Content-Length. Correct it, else Tornado raises an error
138 if response.body is not None: 138 ↛ 140line 138 didn't jump to line 140, because the condition on line 138 was never false
139 self.set_header('Content-Length', len(response.body))
140 for header_name, header_value in self.headers.items():
141 self.set_header(header_name, header_value)
142 # Pass on HTTP status code and response body as-is
143 self.set_status(response.code, reason=response.reason)
144 if response.body is not None: 144 ↛ exitline 144 didn't return from function 'method', because the condition on line 144 was never false
145 self.write(response.body)