TLA Line data Source code
1 : //
2 : // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
3 : // Copyright (c) 2024 Mohammad Nejati
4 : //
5 : // Distributed under the Boost Software License, Version 1.0. (See accompanying
6 : // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 : //
8 : // Official repository: https://github.com/cppalliance/http
9 : //
10 :
11 : #ifndef BOOST_HTTP_PARSER_HPP
12 : #define BOOST_HTTP_PARSER_HPP
13 :
14 : #include <boost/http/config.hpp>
15 : #include <boost/http/detail/header.hpp>
16 : #include <boost/http/detail/type_traits.hpp>
17 : #include <boost/http/error.hpp>
18 :
19 : #include <boost/capy/buffers/buffer_copy.hpp>
20 : #include <boost/capy/buffers/buffer_slice.hpp>
21 : #include <boost/capy/concept/read_stream.hpp>
22 : #include <boost/capy/concept/write_sink.hpp>
23 : #include <boost/capy/cond.hpp>
24 : #include <boost/capy/error.hpp>
25 : #include <boost/capy/io_task.hpp>
26 : #include <boost/core/span.hpp>
27 :
28 : #include <array>
29 : #include <cstddef>
30 : #include <cstdint>
31 : #include <memory>
32 :
33 : namespace boost {
34 : namespace http {
35 :
36 : // Forward declaration
37 : class request_parser;
38 : class response_parser;
39 : class static_request;
40 : class static_response;
41 :
42 : //------------------------------------------------
43 :
44 : /** A parser for HTTP/1 messages.
45 :
46 : This parser uses a single block of memory allocated
47 : during construction and guarantees it will never
48 : exceed the specified size. This space is reused for
49 : parsing multiple HTTP messages ( one at a time ).
50 :
51 : The allocated space is used for:
52 :
53 : @li Buffering raw input from a socket
54 : @li Storing HTTP headers with O(1) access to
55 : method, target, and status code
56 : @li Storing all or part of an HTTP message body
57 : @li Storing state for inflate algorithms
58 :
59 : The parser is strict. Any malformed input according
60 : to the HTTP ABNFs is treated as an unrecoverable
61 : error.
62 :
63 : @see
64 : @ref response_parser,
65 : @ref request_parser.
66 : */
67 : class parser
68 : {
69 : public:
70 : template<capy::ReadStream Stream>
71 : class source;
72 :
73 : /// Buffer type returned from @ref prepare.
74 : using mutable_buffers_type =
75 : boost::span<capy::mutable_buffer const>;
76 :
77 : /// Buffer type returned from @ref pull_body.
78 : using const_buffers_type =
79 : boost::span<capy::const_buffer const>;
80 :
81 : //--------------------------------------------
82 : //
83 : // Observers
84 : //
85 : //--------------------------------------------
86 :
87 : /// Check if a complete header has been parsed.
88 : BOOST_HTTP_DECL
89 : bool
90 : got_header() const noexcept;
91 :
92 : /// Check if a complete message has been parsed.
93 : BOOST_HTTP_DECL
94 : bool
95 : is_complete() const noexcept;
96 :
97 : //--------------------------------------------
98 : //
99 : // Modifiers
100 : //
101 : //--------------------------------------------
102 :
103 : /// Prepare for a new stream.
104 : BOOST_HTTP_DECL
105 : void
106 : reset() noexcept;
107 :
108 : /** Prepare for a new message.
109 :
110 : @par Preconditions
111 : Either this is the first message in the stream,
112 : or the previous message has been fully parsed.
113 : */
114 : BOOST_HTTP_DECL
115 : void
116 : start();
117 :
118 : /** Return a buffer for reading input.
119 :
120 : After writing to the buffer, call @ref commit
121 : with the number of bytes written.
122 :
123 : @par Preconditions
124 : @ref parse returned @ref condition::need_more_input.
125 :
126 : @par Postconditions
127 : A call to @ref commit or @ref commit_eof is
128 : required before calling @ref prepare again.
129 :
130 : @par Exception Safety
131 : Strong guarantee.
132 :
133 : @return A non-empty mutable buffer.
134 :
135 : @see @ref commit, @ref commit_eof.
136 : */
137 : BOOST_HTTP_DECL
138 : mutable_buffers_type
139 : prepare();
140 :
141 : /** Commit bytes to the input buffer.
142 :
143 : @par Preconditions
144 : @li `n <= capy::buffer_size( this->prepare() )`
145 : @li No prior call to @ref commit or @ref commit_eof
146 : since the last @ref prepare
147 :
148 : @par Postconditions
149 : Buffers from @ref prepare are invalidated.
150 :
151 : @par Exception Safety
152 : Strong guarantee.
153 :
154 : @param n The number of bytes written.
155 :
156 : @see @ref parse, @ref prepare.
157 : */
158 : BOOST_HTTP_DECL
159 : void
160 : commit(
161 : std::size_t n);
162 :
163 : /** Indicate end of input.
164 :
165 : Call this when the underlying stream has closed
166 : and no more data will arrive.
167 :
168 : @par Postconditions
169 : Buffers from @ref prepare are invalidated.
170 :
171 : @par Exception Safety
172 : Strong guarantee.
173 :
174 : @see @ref parse, @ref prepare.
175 : */
176 : BOOST_HTTP_DECL
177 : void
178 : commit_eof();
179 :
180 : /** Parse pending input data.
181 :
182 : Returns immediately after the header is fully
183 : parsed to allow @ref set_body_limit to be called
184 : before body parsing begins. If an error occurs
185 : during body parsing, the parsed header remains
186 : valid and accessible.
187 :
188 : When `ec == condition::need_more_input`, read
189 : more data and call @ref commit before calling
190 : this function again.
191 :
192 : When `ec == error::end_of_stream`, the stream
193 : closed cleanly. Call @ref reset to reuse the
194 : parser for a new stream.
195 :
196 : @param ec Set to the error, if any occurred.
197 :
198 : @see @ref start, @ref prepare, @ref commit.
199 : */
200 : BOOST_HTTP_DECL
201 : void
202 : parse(
203 : system::error_code& ec);
204 :
205 : /** Set maximum body size for the current message.
206 :
207 : Overrides @ref parser_config::body_limit for this
208 : message only. The limit resets to the default
209 : for subsequent messages.
210 :
211 : @par Preconditions
212 : `this->got_header() == true` and body parsing
213 : has not started.
214 :
215 : @par Exception Safety
216 : Strong guarantee.
217 :
218 : @param n The body size limit in bytes.
219 :
220 : @see @ref parser_config::body_limit.
221 : */
222 : BOOST_HTTP_DECL
223 : void
224 : set_body_limit(std::uint64_t n);
225 :
226 : /** Return available body data.
227 :
228 : Use this to incrementally process body data.
229 : Call @ref consume_body after processing to
230 : release the buffer space.
231 :
232 : @par Example
233 : @code
234 : request_parser pr( ctx );
235 : pr.start();
236 : co_await pr.read_header( stream );
237 :
238 : while( ! pr.is_complete() )
239 : {
240 : co_await read_some( stream, pr );
241 : auto cbs = pr.pull_body();
242 : // process cbs ...
243 : pr.consume_body( capy::buffer_size( cbs ) );
244 : }
245 : @endcode
246 :
247 : @par Preconditions
248 : `this->got_header() == true`
249 :
250 : @par Postconditions
251 : The returned buffer is invalidated by any
252 : modifying member function.
253 :
254 : @par Exception Safety
255 : Strong guarantee.
256 :
257 : @return Buffers containing available body data.
258 :
259 : @see @ref consume_body.
260 : */
261 : BOOST_HTTP_DECL
262 : const_buffers_type
263 : pull_body();
264 :
265 : /** Consume bytes from available body data.
266 :
267 : @par Preconditions
268 : `n <= capy::buffer_size( this->pull_body() )`
269 :
270 : @par Exception Safety
271 : Strong guarantee.
272 :
273 : @param n The number of bytes to consume.
274 :
275 : @see @ref pull_body.
276 : */
277 : BOOST_HTTP_DECL
278 : void
279 : consume_body(std::size_t n);
280 :
281 : /** Return the complete body.
282 :
283 : Use this when the entire message fits within
284 : the parser's internal buffer.
285 :
286 : @par Example
287 : @code
288 : request_parser pr( ctx );
289 : pr.start();
290 : co_await pr.read_header( stream );
291 : // ... read entire body ...
292 : core::string_view body = pr.body();
293 : @endcode
294 :
295 : @par Preconditions
296 : @li `this->is_complete() == true`
297 : @li No previous call to @ref consume_body
298 :
299 : @par Exception Safety
300 : Strong guarantee.
301 :
302 : @return A string view of the complete body.
303 :
304 : @see @ref is_complete.
305 : */
306 : BOOST_HTTP_DECL
307 : core::string_view
308 : body() const;
309 :
310 : /** Return true if data is buffered past the message.
311 :
312 : After a complete message, returns true when the
313 : parser's buffer still holds octets that lie
314 : beyond it, such as the start of a pipelined
315 : message or data the peer sent past the message
316 : framing. Returns false before the message is
317 : complete.
318 :
319 : This does not include the message body, which is
320 : retrieved separately via @ref body or
321 : @ref pull_body.
322 :
323 : @return true if octets remain buffered past the
324 : completed message.
325 :
326 : @see @ref is_complete, @ref release_buffered_data.
327 : */
328 : BOOST_HTTP_DECL
329 : bool
330 : has_buffered_data() const noexcept;
331 :
332 : /** Return unconsumed data past the last message.
333 :
334 : Use this after an upgrade or CONNECT request
335 : to retrieve protocol-dependent data that
336 : follows the HTTP message.
337 :
338 : @return A string view of leftover data.
339 :
340 : @see @ref metadata::upgrade, @ref metadata::connection.
341 : */
342 : BOOST_HTTP_DECL
343 : core::string_view
344 : release_buffered_data() noexcept;
345 :
346 : /** Asynchronously read the HTTP headers.
347 :
348 : Reads from the stream until the headers are
349 : complete or an error occurs.
350 :
351 : @par Preconditions
352 : @li @ref reset has been called
353 : @li @ref start has been called
354 :
355 : @param stream The stream to read from.
356 :
357 : @return An awaitable yielding `(error_code)`.
358 :
359 : @see @ref read.
360 : */
361 : template<capy::ReadStream Stream>
362 : capy::io_task<>
363 : read_header(Stream& stream);
364 :
365 : /** Asynchronously read a complete HTTP message.
366 :
367 : Reads from the stream until the message is fully
368 : parsed or an error occurs. The body is accumulated
369 : in the parser's internal buffer and can be retrieved
370 : via @ref body after completion.
371 :
372 : If the parser's internal buffer fills before the
373 : message is complete, the operation completes with
374 : @ref error::in_place_overflow.
375 :
376 : @par Preconditions
377 : @li @ref reset has been called
378 : @li @ref start has been called
379 :
380 : @param stream The stream to read from.
381 :
382 : @return An awaitable yielding `(error_code)`.
383 :
384 : @see @ref body, @ref read_header.
385 : */
386 : template<capy::ReadStream Stream>
387 : capy::io_task<>
388 : read(Stream& stream);
389 :
390 : /** Asynchronously read body data into buffers.
391 :
392 : Reads from the stream and copies body data into
393 : the provided buffers with complete-fill semantics.
394 : Returns `capy::error::eof` when the body is complete.
395 :
396 : @par Preconditions
397 : @li @ref reset has been called
398 : @li @ref start has been called
399 :
400 : @param stream The stream to read from.
401 :
402 : @param buffers The buffers to read into.
403 :
404 : @return An awaitable yielding `(error_code,std::size_t)`.
405 :
406 : @see @ref read_header.
407 : */
408 : template<capy::ReadStream Stream, capy::MutableBufferSequence MB>
409 : capy::io_task<std::size_t>
410 : read(Stream& stream, MB buffers);
411 :
412 : /** Return a source for reading body data.
413 :
414 : The returned source satisfies @ref capy::BufferSource.
415 : On first pull, headers are automatically parsed if
416 : not yet received.
417 :
418 : @par Example
419 : @code
420 : request_parser pr( ctx );
421 : pr.start();
422 : auto body = pr.source_for( socket );
423 :
424 : capy::const_buffer arr[16];
425 : auto [ec, bufs] = co_await body.pull( arr );
426 : body.consume( buffer_size( bufs ) );
427 : @endcode
428 :
429 : @param stream The stream to read from.
430 :
431 : @return A source satisfying @ref capy::BufferSource.
432 :
433 : @see @ref read_header, @ref capy::BufferSource.
434 : */
435 : template<capy::ReadStream Stream>
436 : source<Stream>
437 : source_for(Stream& stream) noexcept;
438 :
439 : /** Read body from stream and push to a WriteSink.
440 :
441 : Reads body data from the stream and pushes each chunk to
442 : the sink. The sink must consume all bytes from each write.
443 :
444 : @param stream The stream to read body data from.
445 :
446 : @param sink The sink to receive body data.
447 :
448 : @return An awaitable yielding `(error_code)`.
449 :
450 : @see WriteSink.
451 : */
452 : template<capy::WriteSink Sink>
453 : capy::io_task<>
454 : read(capy::ReadStream auto& stream, Sink&& sink);
455 :
456 : private:
457 : friend class request_parser;
458 : friend class response_parser;
459 : class impl;
460 :
461 : BOOST_HTTP_DECL ~parser();
462 : BOOST_HTTP_DECL parser() noexcept;
463 : BOOST_HTTP_DECL parser(parser&& other) noexcept;
464 : BOOST_HTTP_DECL parser(
465 : std::shared_ptr<parser_config_impl const> cfg,
466 : detail::kind k);
467 : BOOST_HTTP_DECL void assign(parser&& other) noexcept;
468 :
469 : BOOST_HTTP_DECL
470 : void
471 : start_impl(bool);
472 :
473 : static_request const&
474 : safe_get_request() const;
475 :
476 : static_response const&
477 : safe_get_response() const;
478 :
479 : impl* impl_;
480 : };
481 :
482 : /** A source for reading the message body.
483 :
484 : This type satisfies @ref capy::BufferSource. It can be
485 : constructed immediately after parser construction; on
486 : first pull, headers are automatically parsed if not
487 : yet received.
488 :
489 : @tparam Stream A type satisfying @ref capy::ReadStream.
490 :
491 : @see @ref parser::source_for.
492 : */
493 : template<capy::ReadStream Stream>
494 : class parser::source
495 : {
496 : Stream* stream_;
497 : parser* pr_;
498 :
499 : public:
500 : /// Default constructor.
501 : source() noexcept
502 : : stream_(nullptr)
503 : , pr_(nullptr)
504 : {
505 : }
506 :
507 : /// Construct a source for reading body data.
508 HIT 355 : source(Stream& stream, parser& pr) noexcept
509 355 : : stream_(&stream)
510 355 : , pr_(&pr)
511 : {
512 355 : }
513 :
514 : /** Pull buffer data from the body.
515 :
516 : On first invocation, reads headers if not yet parsed.
517 : Returns buffer descriptors pointing to internal parser
518 : memory. When the body is complete, returns an empty span.
519 :
520 : @param dest Span of const_buffer to fill.
521 :
522 : @return An awaitable yielding `(error_code,std::span<const_buffer>)`.
523 : */
524 : capy::io_task<std::span<capy::const_buffer>>
525 : pull(std::span<capy::const_buffer> dest);
526 :
527 : /** Consume bytes from pulled body data.
528 :
529 : Advances the read position by the specified number of
530 : bytes. The next pull returns data starting after the
531 : consumed bytes.
532 :
533 : @param n The number of bytes to consume.
534 : */
535 : void
536 : consume(std::size_t n) noexcept;
537 : };
538 :
539 : template<capy::ReadStream Stream>
540 : capy::io_task<>
541 749 : parser::
542 : read_header(Stream& stream)
543 : {
544 : system::error_code ec;
545 : for(;;)
546 : {
547 : parse(ec);
548 :
549 : if(got_header())
550 : co_return {};
551 :
552 : if(ec != condition::need_more_input)
553 : co_return {std::error_code(ec)};
554 :
555 : auto mbs = prepare();
556 :
557 : auto [read_ec, n] = co_await stream.read_some(mbs);
558 : if(read_ec == capy::cond::eof)
559 : commit_eof();
560 : else if(!read_ec)
561 : commit(n);
562 : else
563 : co_return {read_ec};
564 : }
565 1498 : }
566 :
567 : template<capy::ReadStream Stream>
568 : capy::io_task<>
569 274 : parser::
570 : read(Stream& stream)
571 : {
572 : system::error_code ec;
573 : for(;;)
574 : {
575 : parse(ec);
576 :
577 : if(is_complete())
578 : co_return {};
579 :
580 : if(ec && ec != condition::need_more_input)
581 : co_return {std::error_code(ec)};
582 :
583 : if(ec == condition::need_more_input)
584 : {
585 : auto mbs = prepare();
586 :
587 : auto [read_ec, n] = co_await stream.read_some(mbs);
588 : if(read_ec == capy::cond::eof)
589 : commit_eof();
590 : else if(!read_ec)
591 : commit(n);
592 : else
593 : co_return {read_ec};
594 : }
595 : }
596 548 : }
597 :
598 : template<capy::ReadStream Stream, capy::MutableBufferSequence MB>
599 : capy::io_task<std::size_t>
600 : parser::
601 : read(Stream& stream, MB buffers)
602 : {
603 : if(capy::buffer_empty(buffers))
604 : co_return {{}, 0};
605 :
606 : std::size_t total = 0;
607 : auto dest = capy::buffer_slice(buffers);
608 :
609 : for(;;)
610 : {
611 : system::error_code ec;
612 : parse(ec);
613 :
614 : if(got_header())
615 : {
616 : auto body_data = pull_body();
617 : if(capy::buffer_size(body_data) > 0)
618 : {
619 : std::size_t copied = capy::buffer_copy(dest.data(), body_data);
620 : consume_body(copied);
621 : total += copied;
622 : dest.remove_prefix(copied);
623 :
624 : if(capy::buffer_empty(dest.data()))
625 : co_return {{}, total};
626 : }
627 :
628 : if(is_complete())
629 : co_return {capy::error::eof, total};
630 : }
631 :
632 : if(ec == condition::need_more_input)
633 : {
634 : auto mbs = prepare();
635 : auto [read_ec, n] = co_await stream.read_some(mbs);
636 :
637 : if(read_ec == capy::cond::eof)
638 : commit_eof();
639 : else if(!read_ec)
640 : commit(n);
641 : else
642 : co_return {read_ec, total};
643 :
644 : continue;
645 : }
646 :
647 : if(ec)
648 : co_return {ec, total};
649 : }
650 : }
651 :
652 : template<capy::ReadStream Stream>
653 : parser::source<Stream>
654 355 : parser::
655 : source_for(Stream& stream) noexcept
656 : {
657 355 : return source<Stream>(stream, *this);
658 : }
659 :
660 : template<capy::ReadStream Stream>
661 : capy::io_task<std::span<capy::const_buffer>>
662 922 : parser::source<Stream>::
663 : pull(std::span<capy::const_buffer> dest)
664 : {
665 : // Read headers if not yet parsed
666 : if(!pr_->got_header())
667 : {
668 : auto [ec] = co_await pr_->read_header(*stream_);
669 : if(ec)
670 : co_return {ec, {}};
671 : }
672 :
673 : for(;;)
674 : {
675 : system::error_code ec;
676 : pr_->parse(ec);
677 :
678 : auto body_data = pr_->pull_body();
679 : if(capy::buffer_size(body_data) > 0)
680 : {
681 : std::size_t count = (std::min)(body_data.size(), dest.size());
682 : for(std::size_t i = 0; i < count; ++i)
683 : dest[i] = body_data[i];
684 : co_return {{}, dest.first(count)};
685 : }
686 :
687 : if(pr_->is_complete())
688 : co_return {capy::error::eof, {}};
689 :
690 : if(ec == condition::need_more_input)
691 : {
692 : auto mbs = pr_->prepare();
693 : auto [read_ec, n] = co_await stream_->read_some(mbs);
694 :
695 : if(read_ec == capy::cond::eof)
696 : pr_->commit_eof();
697 : else if(!read_ec)
698 : pr_->commit(n);
699 : else
700 : co_return {read_ec, {}};
701 :
702 : continue;
703 : }
704 :
705 : if(ec)
706 : co_return {ec, {}};
707 : }
708 1844 : }
709 :
710 : template<capy::ReadStream Stream>
711 : void
712 568 : parser::source<Stream>::
713 : consume(std::size_t n) noexcept
714 : {
715 568 : pr_->consume_body(n);
716 568 : }
717 :
718 : template<capy::WriteSink Sink>
719 : capy::io_task<>
720 138 : parser::
721 : read(capy::ReadStream auto& stream, Sink&& sink)
722 : {
723 : for(;;)
724 : {
725 : system::error_code ec;
726 : parse(ec);
727 :
728 : if(got_header())
729 : {
730 : auto body_data = pull_body();
731 : if(capy::buffer_size(body_data) > 0)
732 : {
733 : auto [write_ec, n] = co_await sink.write(body_data);
734 : if(write_ec)
735 : co_return {write_ec};
736 : consume_body(n);
737 : }
738 :
739 : if(is_complete())
740 : {
741 : auto [eof_ec] = co_await sink.write_eof();
742 : co_return {eof_ec};
743 : }
744 : }
745 :
746 : if(ec == condition::need_more_input)
747 : {
748 : auto mbs = prepare();
749 : auto [read_ec, n] = co_await stream.read_some(mbs);
750 :
751 : if(read_ec == capy::cond::eof)
752 : commit_eof();
753 : else if(!read_ec)
754 : commit(n);
755 : else
756 : co_return {read_ec};
757 :
758 : continue;
759 : }
760 :
761 : if(ec)
762 : co_return {std::error_code(ec)};
763 : }
764 276 : }
765 :
766 : } // http
767 : } // boost
768 :
769 : #endif
|