代码读起来容易,写起来容易掉头发。
HTTP请求头是boost自带的一个example,解析body为JSON字符串是我加的(暂不支持嵌套JSON)
.hpp文件
//
// Created by sxuer on 2021/5/5.
//
#ifndef payhttp_REQUESTPARSER_HPP
#define payhttp_REQUESTPARSER_HPP
#include <tuple>
#include <boost/algorithm/string.hpp>
#include "Request.hpp"
#include <iostream>
namespace payhttp {
class RequestParser {
public:
RequestParser();
// 重置解析器状态
void reset();
// 解析器结果 // 好、坏、尚未定论(继续解析)
enum resultType {
good, bad, indeterminate
};
void parseParam(Request& req, std::string& data_);
/**
* 根据begin逐个char解析,获得一个req对象,并返回解析结果
* 最终的InputIterator是被消耗过的,指向尚未被消费的起点处
*/
template<typename InputIterator>
std::tuple<resultType, InputIterator> parse(Request& req,
InputIterator begin, InputIterator end) {
// 挨个解析,直到得到好\坏结果为止
while (begin != end) {
resultType result = consume(req, *begin++);
if (result == bad) {
return std::make_tuple(result, begin);
} else if (result == good) {// 第一次good可能是header解析完毕,可能还有请求体
if (*begin++ == '{') {
state_ = json_parse_start;
continue;
}
// for (const auto& item : req.body_) {
// std::cout << item.first << ":" << item.second << std::endl;
// }
return std::make_tuple(result, begin);
}
}
return std::make_tuple(indeterminate, begin);
}
private:
// 利用输入的串,构造request对象
resultType consume(Request& req, char input);
static bool isChar(int c);
static bool isDigit(int c);
// return (c >= 0 && c <= 31) || (c == 127);
static bool isCtl(int c);
/// 一些特殊可用符号 < > {} 等
static bool isTspecial(int c);
/// 解析器状态
enum state {
method_start,
method,
uri,
http_version_h,
http_version_t_1,
http_version_t_2,
http_version_p,
http_version_slash,
http_version_major_start,
http_version_major,
http_version_minor_start,
http_version_minor,
expecting_newline_1,
header_line_start,
header_lws,
header_name,
space_before_header_value,
header_value,
expecting_newline_2,
expecting_newline_3,
// {\n \"name\":123,\n \"李四\":\"李四\",\n \"1\":null,\n \"wx\":12.8\n}
json_parse_start, // {
// json_key_left_slash, // key的左引号前的反斜杠
json_key_left_dot, // key的左引号"
json_key_right_dot, // key的右引号"
json_key,
json_value_start,
json_value,
json_value_left_dot, // key的左引号"
json_value_right_dot, // key的右引号"
json_check_end, // value结束,判断是否是结尾
json_comma, // 逗号
json_colon, // 冒号
json_save // 保存一对键值对
} state_;
};
}
#endif //payhttp_REQUESTPARSER_HPP
.cpp文件
#include <tuple>
#include <boost/algorithm/string.hpp>
#include "server/include/RequestParser.hpp"
namespace payhttp {
// 默认构造状态:method_start
RequestParser::RequestParser()
: state_(method_start) {
}
// 重置状态
void RequestParser::reset() {
state_ = method_start;
}
// 自动状态机 挨个解析字符,得到一个最终结果
RequestParser::resultType RequestParser::consume(Request& req, char input) {
switch (state_) {
case method_start:
// 如果不是char或者是非法char
if (!isChar(input) || isCtl(input) || isTspecial(input)) {
return bad;
// 否则跳转到method判断
} else {
state_ = method;
// 尾部追加char
req.method_.push_back(input);
return indeterminate;
}
case method: // 方法解析完毕会遇到一个' ',开始解析uri
if (input == ' ') {
state_ = uri;
return indeterminate;
} else if (!isChar(input) || isCtl(input) || isTspecial(input)) {
return bad;
} else {
req.method_.push_back(input);
return indeterminate;
}
case uri: // uri解析完毕遇到' ',开始解析http版本号
if (input == ' ') {
state_ = http_version_h;
return indeterminate;
} else if (isCtl(input)) {
return bad;
} else {
req.uri_.push_back(input);
return indeterminate;
}
case http_version_h:
if (input == 'H') {
state_ = http_version_t_1;
return indeterminate;
} else {
return bad;
}
case http_version_t_1:
if (input == 'T') {
state_ = http_version_t_2;
return indeterminate;
} else {
return bad;
}
case http_version_t_2:
if (input == 'T') {
state_ = http_version_p;
return indeterminate;
} else {
return bad;
}
case http_version_p:
if (input == 'P') {
state_ = http_version_slash;
return indeterminate;
} else {
return bad;
}
case http_version_slash:
if (input == '/') {
req.httpVersionMajor_ = 0;
req.httpVersionMinor_ = 0;
state_ = http_version_major_start;
return indeterminate;
} else {
return bad;
}
case http_version_major_start:
if (isDigit(input)) {
req.httpVersionMajor_ = req.httpVersionMajor_ * 10 + input - '0';
state_ = http_version_major;
return indeterminate;
} else {
return bad;
}
case http_version_major:
if (input == '.') {
state_ = http_version_minor_start;
return indeterminate;
} else if (isDigit(input)) {
req.httpVersionMajor_ = req.httpVersionMajor_ * 10 + input - '0';
return indeterminate;
} else {
return bad;
}
case http_version_minor_start:
if (isDigit(input)) {
req.httpVersionMinor_ = req.httpVersionMinor_ * 10 + input - '0';
state_ = http_version_minor;
return indeterminate;
} else {
return bad;
}
case http_version_minor:
if (input == '\r') {
state_ = expecting_newline_1;
return indeterminate;
} else if (isDigit(input)) {
req.httpVersionMinor_ = req.httpVersionMinor_ * 10 + input - '0';
return indeterminate;
} else {
return bad;
}
case expecting_newline_1:
if (input == '\n') {
state_ = header_line_start;
return indeterminate;
} else {
return bad;
}
case header_line_start:
if (input == '\r') {
state_ = expecting_newline_3;
return indeterminate;
} else if (!req.headers_.empty() && (input == ' ' || input == '\t')) {
state_ = header_lws;
return indeterminate;
} else if (!isChar(input) || isCtl(input) || isTspecial(input)) {
return bad;
} else {
req.headers_.emplace_back(Header());
req.headers_.back().name_.push_back(input);
state_ = header_name;
return indeterminate;
}
case header_lws:
if (input == '\r') {
state_ = expecting_newline_2;
return indeterminate;
} else if (input == ' ' || input == '\t') {
return indeterminate;
} else if (isCtl(input)) {
return bad;
} else {
state_ = header_value;
req.headers_.back().value_.push_back(input);
return indeterminate;
}
case header_name:
if (input == ':') {
state_ = space_before_header_value;
return indeterminate;
} else if (!isChar(input) || isCtl(input) || isTspecial(input)) {
return bad;
} else {
req.headers_.back().name_.push_back(input);
return indeterminate;
}
case space_before_header_value:
if (input == ' ') {
state_ = header_value;
return indeterminate;
} else {
return bad;
}
case header_value:
if (input == '\r') {
state_ = expecting_newline_2;
return indeterminate;
} else if (isCtl(input)) {
return bad;
} else {
req.headers_.back().value_.push_back(input);
return indeterminate;
}
case expecting_newline_2:
if (input == '\n') {
state_ = header_line_start;
return indeterminate;
} else {
return bad;
}
case expecting_newline_3:
return (input == '\n') ? good : bad;
case json_parse_start:
if (input == ' ') {
return indeterminate;
}
if (input == '\n') {
state_ = json_key_left_dot;
return indeterminate;
} else {
return bad;
}
case json_key_left_dot:
if (input == ' ') {
return indeterminate;
}
// 开始key扫描之前,先加入到body中
if (!req.key_.empty()) {
req.body_.insert(make_pair(req.key_, req.value_));
req.key_.clear();
req.value_.clear();
}
if (input == '"') {
state_ = json_key;
return indeterminate;
} else {
return bad;
}
case json_key:
if (input == ' ') {
return indeterminate;
}
if (input == '"') {
state_ = json_key_right_dot;
return indeterminate;
} else {
// 接收所有输入值
req.key_.push_back(input);
return indeterminate;
}
case json_key_right_dot:
if (input == ' ') {
return indeterminate;
}
if (input == '"' || input == ':') {
state_ = json_colon;
return indeterminate;
} else {
return bad;
}
case json_colon:
if (input == ' ') {
return indeterminate;
}
if (input == ':') {
state_ = json_value_start;
return indeterminate;
} else if (input == '"') {
state_ = json_value_start;
return indeterminate;
} else {
// 接收所有输入
state_ = json_value;
req.value_.push_back(input);
return indeterminate;
}
case json_value_start:
if (input == ' ') {
return indeterminate;
}
if (input == '"') {
state_ = json_value_left_dot;
return indeterminate;
} else {
// 接收所有输入
state_ = json_value;
req.value_.push_back(input);
return indeterminate;
}
case json_value_left_dot:
if (input == ' ') {
return indeterminate;
}
state_ = json_value;
if (input == '"') {
return indeterminate;
} else {
return bad;
}
case json_value:
if (input == ' ') {
return indeterminate;
}
if (input == ',') {
state_ = json_comma;
return indeterminate;
} else if (input == '"') {
state_ = json_value_right_dot;
return indeterminate;
} else if (input == '\n') { // 如果不是逗号,也不是反斜杠,而是换行符,肯定是结束了
state_ = json_check_end;
return indeterminate;
} else {
req.value_.push_back(input);
return indeterminate;
}
case json_value_right_dot:
if (input == ' ') {
return indeterminate;
}
if (input == ',') {
state_ = json_comma;
return indeterminate;
}
if (input == '"') {
state_ = json_check_end;
return indeterminate;
} else {
return bad;
}
case json_check_end:
if (input == ' ') {
return indeterminate;
}
if (input == '\n') {
return indeterminate;
} else if (input == '}') {
req.body_.insert(make_pair(req.key_, req.value_));
req.key_.clear();
req.value_.clear();
return good;
} else if (input == ',') {
state_ = json_comma;
return indeterminate;
} else {
return bad;
}
case json_comma:
if (input == ' ') {
return indeterminate;
}
if (input == '\n') {
state_ = json_key_left_dot;
return indeterminate;
} else if (input == '\\') {
state_ = json_key_left_dot;
return indeterminate;
} else {
return bad;
}
default:
return bad;
}
}
void RequestParser::parseParam(Request& req, std::string& data_) {
//解析uri参数
int index = (int) req.uri_.find_first_of('?');
if (index >= 0) {
// 无uri参数的uri
req.shortUri_ = req.uri_.substr(0, index);
/**
* 拿到?后面的字符串,然后根据uri参数的规则解析
*/
std::string param_str = req.uri_.substr(index + 1, req.uri_.size());
std::vector<std::string> split_result;
boost::split(split_result, param_str, boost::is_any_of("&"));
for (auto& i : split_result) {
std::vector<std::string> split_result_temp;
boost::split(split_result_temp, i, boost::is_any_of("="));
if (split_result_temp.size() >= 2) {
// 原地构造 emplace_back
req.uriParams_.insert(make_pair(split_result_temp.at(0), split_result_temp.at(1)));
// req.uriParams_.back().name_ = split_result_temp.at(0);
// req.uriParams_.back().value_ = split_result_temp.at(1);
}
}
} else {
req.shortUri_ = req.uri_;
}
// 解析uri参数结束
// 解析body 表单
std::string content_type;
for (auto& header : req.headers_) {
if (boost::algorithm::iequals(header.name_, "content-type")) {
content_type = header.value_;
break;
}
}
int index_content_type = (int) content_type.find_first_of(';');
if (index_content_type > 0) {
content_type = content_type.substr(0, index_content_type);
}
}
bool RequestParser::isChar(int c) {
return c >= 0 && c <= 127;
}
bool RequestParser::isCtl(int c) {
return (c >= 0 && c <= 31) || (c == 127);
}
bool RequestParser::isTspecial(int c) {
switch (c) {
case '(':
case ')':
case '<':
case '>':
case '@':
case ',':
case ';':
case ':':
case '\\':
case '"':
case '/':
case '[':
case ']':
case '?':
case '=':
case '{':
case '}':
case ' ':
case '\t':
return true;
default:
return false;
}
}
bool RequestParser::isDigit(int c) {
return c >= '0' && c <= '9';
}
}