calibre-web/lib/tornado/httpserver.py

529 lines
20 KiB
Python

#!/usr/bin/env python
#
# Copyright 2009 Facebook
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""A non-blocking, single-threaded HTTP server.
Typical applications have little direct interaction with the `HTTPServer`
class except to start a server at the beginning of the process
(and even that is often done indirectly via `tornado.web.Application.listen`).
This module also defines the `HTTPRequest` class which is exposed via
`tornado.web.RequestHandler.request`.
"""
from __future__ import absolute_import, division, print_function, with_statement
import socket
import ssl
import time
from tornado.escape import native_str, parse_qs_bytes
from tornado import httputil
from tornado import iostream
from tornado.log import gen_log
from tornado import netutil
from tornado.tcpserver import TCPServer
from tornado import stack_context
from tornado.util import bytes_type
try:
import Cookie # py2
except ImportError:
import http.cookies as Cookie # py3
class HTTPServer(TCPServer):
r"""A non-blocking, single-threaded HTTP server.
A server is defined by a request callback that takes an HTTPRequest
instance as an argument and writes a valid HTTP response with
`HTTPRequest.write`. `HTTPRequest.finish` finishes the request (but does
not necessarily close the connection in the case of HTTP/1.1 keep-alive
requests). A simple example server that echoes back the URI you
requested::
import tornado.httpserver
import tornado.ioloop
def handle_request(request):
message = "You requested %s\n" % request.uri
request.write("HTTP/1.1 200 OK\r\nContent-Length: %d\r\n\r\n%s" % (
len(message), message))
request.finish()
http_server = tornado.httpserver.HTTPServer(handle_request)
http_server.listen(8888)
tornado.ioloop.IOLoop.instance().start()
`HTTPServer` is a very basic connection handler. It parses the request
headers and body, but the request callback is responsible for producing
the response exactly as it will appear on the wire. This affords
maximum flexibility for applications to implement whatever parts
of HTTP responses are required.
`HTTPServer` supports keep-alive connections by default
(automatically for HTTP/1.1, or for HTTP/1.0 when the client
requests ``Connection: keep-alive``). This means that the request
callback must generate a properly-framed response, using either
the ``Content-Length`` header or ``Transfer-Encoding: chunked``.
Applications that are unable to frame their responses properly
should instead return a ``Connection: close`` header in each
response and pass ``no_keep_alive=True`` to the `HTTPServer`
constructor.
If ``xheaders`` is ``True``, we support the
``X-Real-Ip``/``X-Forwarded-For`` and
``X-Scheme``/``X-Forwarded-Proto`` headers, which override the
remote IP and URI scheme/protocol for all requests. These headers
are useful when running Tornado behind a reverse proxy or load
balancer. The ``protocol`` argument can also be set to ``https``
if Tornado is run behind an SSL-decoding proxy that does not set one of
the supported ``xheaders``.
To make this server serve SSL traffic, send the ``ssl_options`` dictionary
argument with the arguments required for the `ssl.wrap_socket` method,
including ``certfile`` and ``keyfile``. (In Python 3.2+ you can pass
an `ssl.SSLContext` object instead of a dict)::
HTTPServer(applicaton, ssl_options={
"certfile": os.path.join(data_dir, "mydomain.crt"),
"keyfile": os.path.join(data_dir, "mydomain.key"),
})
`HTTPServer` initialization follows one of three patterns (the
initialization methods are defined on `tornado.tcpserver.TCPServer`):
1. `~tornado.tcpserver.TCPServer.listen`: simple single-process::
server = HTTPServer(app)
server.listen(8888)
IOLoop.instance().start()
In many cases, `tornado.web.Application.listen` can be used to avoid
the need to explicitly create the `HTTPServer`.
2. `~tornado.tcpserver.TCPServer.bind`/`~tornado.tcpserver.TCPServer.start`:
simple multi-process::
server = HTTPServer(app)
server.bind(8888)
server.start(0) # Forks multiple sub-processes
IOLoop.instance().start()
When using this interface, an `.IOLoop` must *not* be passed
to the `HTTPServer` constructor. `~.TCPServer.start` will always start
the server on the default singleton `.IOLoop`.
3. `~tornado.tcpserver.TCPServer.add_sockets`: advanced multi-process::
sockets = tornado.netutil.bind_sockets(8888)
tornado.process.fork_processes(0)
server = HTTPServer(app)
server.add_sockets(sockets)
IOLoop.instance().start()
The `~.TCPServer.add_sockets` interface is more complicated,
but it can be used with `tornado.process.fork_processes` to
give you more flexibility in when the fork happens.
`~.TCPServer.add_sockets` can also be used in single-process
servers if you want to create your listening sockets in some
way other than `tornado.netutil.bind_sockets`.
"""
def __init__(self, request_callback, no_keep_alive=False, io_loop=None,
xheaders=False, ssl_options=None, protocol=None, **kwargs):
self.request_callback = request_callback
self.no_keep_alive = no_keep_alive
self.xheaders = xheaders
self.protocol = protocol
TCPServer.__init__(self, io_loop=io_loop, ssl_options=ssl_options,
**kwargs)
def handle_stream(self, stream, address):
HTTPConnection(stream, address, self.request_callback,
self.no_keep_alive, self.xheaders, self.protocol)
class _BadRequestException(Exception):
"""Exception class for malformed HTTP requests."""
pass
class HTTPConnection(object):
"""Handles a connection to an HTTP client, executing HTTP requests.
We parse HTTP headers and bodies, and execute the request callback
until the HTTP conection is closed.
"""
def __init__(self, stream, address, request_callback, no_keep_alive=False,
xheaders=False, protocol=None):
self.stream = stream
self.address = address
# Save the socket's address family now so we know how to
# interpret self.address even after the stream is closed
# and its socket attribute replaced with None.
self.address_family = stream.socket.family
self.request_callback = request_callback
self.no_keep_alive = no_keep_alive
self.xheaders = xheaders
self.protocol = protocol
self._clear_request_state()
# Save stack context here, outside of any request. This keeps
# contexts from one request from leaking into the next.
self._header_callback = stack_context.wrap(self._on_headers)
self.stream.set_close_callback(self._on_connection_close)
self.stream.read_until(b"\r\n\r\n", self._header_callback)
def _clear_request_state(self):
"""Clears the per-request state.
This is run in between requests to allow the previous handler
to be garbage collected (and prevent spurious close callbacks),
and when the connection is closed (to break up cycles and
facilitate garbage collection in cpython).
"""
self._request = None
self._request_finished = False
self._write_callback = None
self._close_callback = None
def set_close_callback(self, callback):
"""Sets a callback that will be run when the connection is closed.
Use this instead of accessing
`HTTPConnection.stream.set_close_callback
<.BaseIOStream.set_close_callback>` directly (which was the
recommended approach prior to Tornado 3.0).
"""
self._close_callback = stack_context.wrap(callback)
def _on_connection_close(self):
if self._close_callback is not None:
callback = self._close_callback
self._close_callback = None
callback()
# Delete any unfinished callbacks to break up reference cycles.
self._header_callback = None
self._clear_request_state()
def close(self):
self.stream.close()
# Remove this reference to self, which would otherwise cause a
# cycle and delay garbage collection of this connection.
self._header_callback = None
self._clear_request_state()
def write(self, chunk, callback=None):
"""Writes a chunk of output to the stream."""
if not self.stream.closed():
self._write_callback = stack_context.wrap(callback)
self.stream.write(chunk, self._on_write_complete)
def finish(self):
"""Finishes the request."""
self._request_finished = True
# No more data is coming, so instruct TCP to send any remaining
# data immediately instead of waiting for a full packet or ack.
self.stream.set_nodelay(True)
if not self.stream.writing():
self._finish_request()
def _on_write_complete(self):
if self._write_callback is not None:
callback = self._write_callback
self._write_callback = None
callback()
# _on_write_complete is enqueued on the IOLoop whenever the
# IOStream's write buffer becomes empty, but it's possible for
# another callback that runs on the IOLoop before it to
# simultaneously write more data and finish the request. If
# there is still data in the IOStream, a future
# _on_write_complete will be responsible for calling
# _finish_request.
if self._request_finished and not self.stream.writing():
self._finish_request()
def _finish_request(self):
if self.no_keep_alive or self._request is None:
disconnect = True
else:
connection_header = self._request.headers.get("Connection")
if connection_header is not None:
connection_header = connection_header.lower()
if self._request.supports_http_1_1():
disconnect = connection_header == "close"
elif ("Content-Length" in self._request.headers
or self._request.method in ("HEAD", "GET")):
disconnect = connection_header != "keep-alive"
else:
disconnect = True
self._clear_request_state()
if disconnect:
self.close()
return
try:
# Use a try/except instead of checking stream.closed()
# directly, because in some cases the stream doesn't discover
# that it's closed until you try to read from it.
self.stream.read_until(b"\r\n\r\n", self._header_callback)
# Turn Nagle's algorithm back on, leaving the stream in its
# default state for the next request.
self.stream.set_nodelay(False)
except iostream.StreamClosedError:
self.close()
def _on_headers(self, data):
try:
data = native_str(data.decode('latin1'))
eol = data.find("\r\n")
start_line = data[:eol]
try:
method, uri, version = start_line.split(" ")
except ValueError:
raise _BadRequestException("Malformed HTTP request line")
if not version.startswith("HTTP/"):
raise _BadRequestException("Malformed HTTP version in HTTP Request-Line")
try:
headers = httputil.HTTPHeaders.parse(data[eol:])
except ValueError:
# Probably from split() if there was no ':' in the line
raise _BadRequestException("Malformed HTTP headers")
# HTTPRequest wants an IP, not a full socket address
if self.address_family in (socket.AF_INET, socket.AF_INET6):
remote_ip = self.address[0]
else:
# Unix (or other) socket; fake the remote address
remote_ip = '0.0.0.0'
self._request = HTTPRequest(
connection=self, method=method, uri=uri, version=version,
headers=headers, remote_ip=remote_ip, protocol=self.protocol)
content_length = headers.get("Content-Length")
if content_length:
content_length = int(content_length)
if content_length > self.stream.max_buffer_size:
raise _BadRequestException("Content-Length too long")
if headers.get("Expect") == "100-continue":
self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n")
self.stream.read_bytes(content_length, self._on_request_body)
return
self.request_callback(self._request)
except _BadRequestException as e:
gen_log.info("Malformed HTTP request from %s: %s",
self.address[0], e)
self.close()
return
def _on_request_body(self, data):
self._request.body = data
if self._request.method in ("POST", "PATCH", "PUT"):
httputil.parse_body_arguments(
self._request.headers.get("Content-Type", ""), data,
self._request.arguments, self._request.files)
self.request_callback(self._request)
class HTTPRequest(object):
"""A single HTTP request.
All attributes are type `str` unless otherwise noted.
.. attribute:: method
HTTP request method, e.g. "GET" or "POST"
.. attribute:: uri
The requested uri.
.. attribute:: path
The path portion of `uri`
.. attribute:: query
The query portion of `uri`
.. attribute:: version
HTTP version specified in request, e.g. "HTTP/1.1"
.. attribute:: headers
`.HTTPHeaders` dictionary-like object for request headers. Acts like
a case-insensitive dictionary with additional methods for repeated
headers.
.. attribute:: body
Request body, if present, as a byte string.
.. attribute:: remote_ip
Client's IP address as a string. If ``HTTPServer.xheaders`` is set,
will pass along the real IP address provided by a load balancer
in the ``X-Real-Ip`` or ``X-Forwarded-For`` header.
.. versionchanged:: 3.1
The list format of ``X-Forwarded-For`` is now supported.
.. attribute:: protocol
The protocol used, either "http" or "https". If ``HTTPServer.xheaders``
is set, will pass along the protocol used by a load balancer if
reported via an ``X-Scheme`` header.
.. attribute:: host
The requested hostname, usually taken from the ``Host`` header.
.. attribute:: arguments
GET/POST arguments are available in the arguments property, which
maps arguments names to lists of values (to support multiple values
for individual names). Names are of type `str`, while arguments
are byte strings. Note that this is different from
`.RequestHandler.get_argument`, which returns argument values as
unicode strings.
.. attribute:: files
File uploads are available in the files property, which maps file
names to lists of `.HTTPFile`.
.. attribute:: connection
An HTTP request is attached to a single HTTP connection, which can
be accessed through the "connection" attribute. Since connections
are typically kept open in HTTP/1.1, multiple requests can be handled
sequentially on a single connection.
"""
def __init__(self, method, uri, version="HTTP/1.0", headers=None,
body=None, remote_ip=None, protocol=None, host=None,
files=None, connection=None):
self.method = method
self.uri = uri
self.version = version
self.headers = headers or httputil.HTTPHeaders()
self.body = body or ""
# set remote IP and protocol
self.remote_ip = remote_ip
if protocol:
self.protocol = protocol
elif connection and isinstance(connection.stream,
iostream.SSLIOStream):
self.protocol = "https"
else:
self.protocol = "http"
# xheaders can override the defaults
if connection and connection.xheaders:
# Squid uses X-Forwarded-For, others use X-Real-Ip
ip = self.headers.get("X-Forwarded-For", self.remote_ip)
ip = ip.split(',')[-1].strip()
ip = self.headers.get(
"X-Real-Ip", ip)
if netutil.is_valid_ip(ip):
self.remote_ip = ip
# AWS uses X-Forwarded-Proto
proto = self.headers.get(
"X-Scheme", self.headers.get("X-Forwarded-Proto", self.protocol))
if proto in ("http", "https"):
self.protocol = proto
self.host = host or self.headers.get("Host") or "127.0.0.1"
self.files = files or {}
self.connection = connection
self._start_time = time.time()
self._finish_time = None
self.path, sep, self.query = uri.partition('?')
self.arguments = parse_qs_bytes(self.query, keep_blank_values=True)
def supports_http_1_1(self):
"""Returns True if this request supports HTTP/1.1 semantics"""
return self.version == "HTTP/1.1"
@property
def cookies(self):
"""A dictionary of Cookie.Morsel objects."""
if not hasattr(self, "_cookies"):
self._cookies = Cookie.SimpleCookie()
if "Cookie" in self.headers:
try:
self._cookies.load(
native_str(self.headers["Cookie"]))
except Exception:
self._cookies = {}
return self._cookies
def write(self, chunk, callback=None):
"""Writes the given chunk to the response stream."""
assert isinstance(chunk, bytes_type)
self.connection.write(chunk, callback=callback)
def finish(self):
"""Finishes this HTTP request on the open connection."""
self.connection.finish()
self._finish_time = time.time()
def full_url(self):
"""Reconstructs the full URL for this request."""
return self.protocol + "://" + self.host + self.uri
def request_time(self):
"""Returns the amount of time it took for this request to execute."""
if self._finish_time is None:
return time.time() - self._start_time
else:
return self._finish_time - self._start_time
def get_ssl_certificate(self, binary_form=False):
"""Returns the client's SSL certificate, if any.
To use client certificates, the HTTPServer must have been constructed
with cert_reqs set in ssl_options, e.g.::
server = HTTPServer(app,
ssl_options=dict(
certfile="foo.crt",
keyfile="foo.key",
cert_reqs=ssl.CERT_REQUIRED,
ca_certs="cacert.crt"))
By default, the return value is a dictionary (or None, if no
client certificate is present). If ``binary_form`` is true, a
DER-encoded form of the certificate is returned instead. See
SSLSocket.getpeercert() in the standard library for more
details.
http://docs.python.org/library/ssl.html#sslsocket-objects
"""
try:
return self.connection.stream.socket.getpeercert(
binary_form=binary_form)
except ssl.SSLError:
return None
def __repr__(self):
attrs = ("protocol", "host", "method", "uri", "version", "remote_ip")
args = ", ".join(["%s=%r" % (n, getattr(self, n)) for n in attrs])
return "%s(%s, headers=%s)" % (
self.__class__.__name__, args, dict(self.headers))