Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import six 

2import tornado.web 

3import tornado.gen 

4from six.moves.urllib_parse import urlsplit, urlunsplit, parse_qs, urlencode 

5from tornado.httputil import HTTPHeaders 

6from tornado.httpclient import AsyncHTTPClient, HTTPRequest 

7from gramex.transforms import build_transform 

8from gramex.config import app_log 

9from gramex.http import MOVED_PERMANENTLY, FOUND 

10from .basehandler import BaseHandler 

11 

12 

13class ProxyHandler(BaseHandler): 

14 ''' 

15 Passes the request to another HTTP REST API endpoint and returns its 

16 response. This is useful when: 

17 

18 - exposing another website but via Gramex authentication (e.g. R-Shiny apps) 

19 - a server-side REST API must be accessed via the browser (e.g. Twitter) 

20 - passing requests to an API that requires authentication (e.g. Google) 

21 - the request or response needs to be transformed (e.g. add sentiment) 

22 - caching is required on the API (e.g. cache for 10 min) 

23 

24 :arg string url: URL endpoint to forward to. If the pattern ends with 

25 ``(.*)``, that part is added to this url. 

26 :arg dict request_headers: HTTP headers to be passed to the url. 

27 - ``"*": true`` forwards all HTTP headers from the request as-is. 

28 - A value of ``true`` forwards this header from the request as-is. 

29 - Any string value is formatted with ``handler`` as a variable. 

30 :arg dict default: Default URL query parameters 

31 :arg dict headers: HTTP headers to set on the response 

32 :arg list methods: list of HTTP methods allowed (default: [GET, HEAD, POST]) 

33 :arg function prepare: A function that accepts any of ``handler`` and ``request`` 

34 (a tornado.httpclient.HTTPRequest) and modifies the ``request`` in-place 

35 :arg function modify: A function that accepts any of ``handler``, ``request`` 

36 and ``response`` (tornado.httpclient.HTTPResponse) and modifies the 

37 ``response`` in-place 

38 :arg int connect_timeout: Timeout for initial connection in seconds (default: 20) 

39 :arg int request_timeout: Timeout for entire request in seconds (default: 20) 

40 

41 Example YAML configuration:: 

42 

43 pattern: /gmail/(.*) 

44 handler: ProxyHandler 

45 kwargs: 

46 url: https://www.googleapis.com/gmail/v1/ 

47 request_headers: 

48 "*": true # Pass on all HTTP headers 

49 Cookie: true # Pass on the Cookie HTTP header 

50 # Over-ride the Authorization header 

51 Authorization: 'Bearer {handler.session[google_access_token]}' 

52 default: 

53 alt: json 

54 

55 The response has the same HTTP headers and body as the proxied request, but: 

56 

57 - Connection and Transfer-Encoding headers are ignored 

58 - ``X-Proxy-Url:`` header has the final URL that responded (after redirects) 

59 

60 These headers can be over-ridden by the ``headers:`` section. 

61 ''' 

62 @classmethod 

63 def setup(cls, url, request_headers={}, default={}, prepare=None, modify=None, 

64 headers={}, methods=['GET', 'HEAD', 'POST'], 

65 connect_timeout=20, request_timeout=20, **kwargs): 

66 super(ProxyHandler, cls).setup(**kwargs) 

67 cls.url, cls.request_headers, cls.default = url, request_headers, default 

68 cls.headers = headers 

69 cls.connect_timeout, cls.request_timeout = connect_timeout, request_timeout 

70 cls.info = {} 

71 for key, fn in (('prepare', prepare), ('modify', modify)): 

72 if fn: 72 ↛ 71line 72 didn't jump to line 71, because the condition on line 72 was never false

73 cls.info[key] = build_transform( 

74 {'function': fn}, filename='url:%s.%s' % (cls.name, key), 

75 vars={'handler': None, 'request': None, 'response': None}) 

76 cls.browser = AsyncHTTPClient() 

77 for method in methods: 

78 setattr(cls, method.lower(), cls.method) 

79 

80 @tornado.gen.coroutine 

81 def method(self, *path_args): 

82 # Construct HTTP headers 

83 headers = HTTPHeaders(self.request.headers if self.request_headers.get('*', None) else {}) 

84 for key, val in self.request_headers.items(): 

85 if key == '*': 85 ↛ 86line 85 didn't jump to line 86, because the condition on line 85 was never true

86 continue 

87 if val is True: 

88 if key in self.request.headers: 

89 headers[key] = self.request.headers[key] 

90 else: 

91 headers[key] = six.text_type(val).format(handler=self) 

92 

93 # Update query parameters 

94 # TODO: use a named capture for path_args? This is not the right method 

95 parts = urlsplit(self.url.format(*path_args)) 

96 params = { 

97 key: ([six.text_type(v).format(handler=self) for v in val] if isinstance(val, list) 

98 else six.text_type(val).format(handler=self)) 

99 for key, val in self.default.items() 

100 } 

101 params.update(parse_qs(parts.query)) 

102 params.update(self.args) 

103 query = urlencode(params, doseq=True) 

104 url = urlunsplit((parts.scheme, parts.netloc, parts.path, query, parts.fragment)) 

105 

106 request = HTTPRequest( 

107 url=url, 

108 method=self.request.method, 

109 headers=headers, 

110 body=None if self.request.method == 'GET' else self.request.body, 

111 connect_timeout=self.connect_timeout, 

112 request_timeout=self.request_timeout, 

113 ) 

114 

115 if 'prepare' in self.info: 115 ↛ 118line 115 didn't jump to line 118, because the condition on line 115 was never false

116 self.info['prepare'](handler=self, request=request, response=None) 

117 

118 app_log.debug('%s: proxying %s', self.name, url) 

119 response = yield self.browser.fetch(request, raise_error=False) 

120 

121 if response.code in (MOVED_PERMANENTLY, FOUND): 121 ↛ 122line 121 didn't jump to line 122, because the condition on line 121 was never true

122 location = response.headers.get('Location', '') 

123 # TODO; check if Location: header MATCHES the url, not startswith 

124 # url: example.org/?x should match Location: example.org/?a=1&x 

125 # even though location does not start with url. 

126 if location.startswith(url): 

127 response.headers['Location'] = location.replace('url', self.conf.pattern) 

128 

129 if 'modify' in self.info: 129 ↛ 133line 129 didn't jump to line 133, because the condition on line 129 was never false

130 self.info['modify'](handler=self, request=request, response=response) 

131 

132 # Pass on the headers as-is, but override with the handler HTTP headers 

133 self.set_header('X-Proxy-Url', response.effective_url) 

134 for header_name, header_value in response.headers.items(): 

135 if header_name not in {'Connection', 'Transfer-Encoding', 'Content-Length'}: 

136 self.set_header(header_name, header_value) 

137 # Proxies may send the wrong Content-Length. Correct it, else Tornado raises an error 

138 if response.body is not None: 138 ↛ 140line 138 didn't jump to line 140, because the condition on line 138 was never false

139 self.set_header('Content-Length', len(response.body)) 

140 for header_name, header_value in self.headers.items(): 

141 self.set_header(header_name, header_value) 

142 # Pass on HTTP status code and response body as-is 

143 self.set_status(response.code, reason=response.reason) 

144 if response.body is not None: 144 ↛ exitline 144 didn't return from function 'method', because the condition on line 144 was never false

145 self.write(response.body)