前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >有限状态自动机 解析HTTP请求头与body中的json

有限状态自动机 解析HTTP请求头与body中的json

作者头像
devi
发布2021-08-18 10:11:14
1.3K0
发布2021-08-18 10:11:14
举报
文章被收录于专栏:搬砖记录

代码读起来容易,写起来容易掉头发。

HTTP请求头是boost自带的一个example,解析body为JSON字符串是我加的(暂不支持嵌套JSON)

.hpp文件

代码语言:javascript
复制
//
// Created by sxuer on 2021/5/5.
//

#ifndef payhttp_REQUESTPARSER_HPP
#define payhttp_REQUESTPARSER_HPP


#include <tuple>
#include <boost/algorithm/string.hpp>
#include "Request.hpp"
#include <iostream>

namespace payhttp {
    class RequestParser {
    public:
        RequestParser();

        // 重置解析器状态
        void reset();

        // 解析器结果 // 好、坏、尚未定论(继续解析)
        enum resultType {
            good, bad, indeterminate
        };

        void parseParam(Request& req, std::string& data_);

        /**
         * 根据begin逐个char解析,获得一个req对象,并返回解析结果
         * 最终的InputIterator是被消耗过的,指向尚未被消费的起点处
         */
        template<typename InputIterator>
        std::tuple<resultType, InputIterator> parse(Request& req,
                                                    InputIterator begin, InputIterator end) {
            // 挨个解析,直到得到好\坏结果为止
            while (begin != end) {
                resultType result = consume(req, *begin++);
                if (result == bad) {
                    return std::make_tuple(result, begin);
                } else if (result == good) {// 第一次good可能是header解析完毕,可能还有请求体
                    if (*begin++ == '{') {
                        state_ = json_parse_start;
                        continue;
                    }
//                    for (const auto& item : req.body_) {
//                        std::cout << item.first << ":" << item.second << std::endl;
//                    }
                    return std::make_tuple(result, begin);
                }
            }
            return std::make_tuple(indeterminate, begin);
        }

    private:
        // 利用输入的串,构造request对象
        resultType consume(Request& req, char input);

        static bool isChar(int c);

        static bool isDigit(int c);

        // return (c >= 0 && c <= 31) || (c == 127);
        static bool isCtl(int c);

        /// 一些特殊可用符号  < > {} 等
        static bool isTspecial(int c);

        /// 解析器状态
        enum state {
            method_start,
            method,
            uri,
            http_version_h,
            http_version_t_1,
            http_version_t_2,
            http_version_p,
            http_version_slash,
            http_version_major_start,
            http_version_major,
            http_version_minor_start,
            http_version_minor,
            expecting_newline_1,
            header_line_start,
            header_lws,
            header_name,
            space_before_header_value,
            header_value,
            expecting_newline_2,
            expecting_newline_3,


            // {\n    \"name\":123,\n    \"李四\":\"李四\",\n    \"1\":null,\n    \"wx\":12.8\n}
            json_parse_start, // {
//            json_key_left_slash, // key的左引号前的反斜杠
            json_key_left_dot,  // key的左引号"
            json_key_right_dot,  // key的右引号"
            json_key,
            json_value_start,
            json_value,
            json_value_left_dot,  // key的左引号"
            json_value_right_dot,  // key的右引号"
            json_check_end, // value结束,判断是否是结尾
            json_comma,  // 逗号
            json_colon, // 冒号
            json_save // 保存一对键值对
        } state_;
    };
}

#endif //payhttp_REQUESTPARSER_HPP

.cpp文件

代码语言:javascript
复制
#include <tuple>
#include <boost/algorithm/string.hpp>
#include "server/include/RequestParser.hpp"

namespace payhttp {
// 默认构造状态:method_start
    RequestParser::RequestParser()
            : state_(method_start) {
    }

// 重置状态
    void RequestParser::reset() {
        state_ = method_start;
    }

// 自动状态机 挨个解析字符,得到一个最终结果
    RequestParser::resultType RequestParser::consume(Request& req, char input) {
        switch (state_) {
            case method_start:
                // 如果不是char或者是非法char
                if (!isChar(input) || isCtl(input) || isTspecial(input)) {
                    return bad;
                    // 否则跳转到method判断
                } else {
                    state_ = method;
                    // 尾部追加char
                    req.method_.push_back(input);
                    return indeterminate;
                }
            case method: // 方法解析完毕会遇到一个' ',开始解析uri
                if (input == ' ') {
                    state_ = uri;
                    return indeterminate;
                } else if (!isChar(input) || isCtl(input) || isTspecial(input)) {
                    return bad;
                } else {
                    req.method_.push_back(input);
                    return indeterminate;
                }
            case uri: // uri解析完毕遇到' ',开始解析http版本号
                if (input == ' ') {
                    state_ = http_version_h;
                    return indeterminate;
                } else if (isCtl(input)) {
                    return bad;
                } else {
                    req.uri_.push_back(input);
                    return indeterminate;
                }
            case http_version_h:
                if (input == 'H') {
                    state_ = http_version_t_1;
                    return indeterminate;
                } else {
                    return bad;
                }
            case http_version_t_1:
                if (input == 'T') {
                    state_ = http_version_t_2;
                    return indeterminate;
                } else {
                    return bad;
                }
            case http_version_t_2:
                if (input == 'T') {
                    state_ = http_version_p;
                    return indeterminate;
                } else {
                    return bad;
                }
            case http_version_p:
                if (input == 'P') {
                    state_ = http_version_slash;
                    return indeterminate;
                } else {
                    return bad;
                }
            case http_version_slash:
                if (input == '/') {
                    req.httpVersionMajor_ = 0;
                    req.httpVersionMinor_ = 0;
                    state_ = http_version_major_start;
                    return indeterminate;
                } else {
                    return bad;
                }
            case http_version_major_start:
                if (isDigit(input)) {
                    req.httpVersionMajor_ = req.httpVersionMajor_ * 10 + input - '0';
                    state_ = http_version_major;
                    return indeterminate;
                } else {
                    return bad;
                }
            case http_version_major:
                if (input == '.') {
                    state_ = http_version_minor_start;
                    return indeterminate;
                } else if (isDigit(input)) {
                    req.httpVersionMajor_ = req.httpVersionMajor_ * 10 + input - '0';
                    return indeterminate;
                } else {
                    return bad;
                }
            case http_version_minor_start:
                if (isDigit(input)) {
                    req.httpVersionMinor_ = req.httpVersionMinor_ * 10 + input - '0';
                    state_ = http_version_minor;
                    return indeterminate;
                } else {
                    return bad;
                }
            case http_version_minor:
                if (input == '\r') {
                    state_ = expecting_newline_1;
                    return indeterminate;
                } else if (isDigit(input)) {
                    req.httpVersionMinor_ = req.httpVersionMinor_ * 10 + input - '0';
                    return indeterminate;
                } else {
                    return bad;
                }
            case expecting_newline_1:
                if (input == '\n') {
                    state_ = header_line_start;
                    return indeterminate;
                } else {
                    return bad;
                }
            case header_line_start:
                if (input == '\r') {
                    state_ = expecting_newline_3;
                    return indeterminate;
                } else if (!req.headers_.empty() && (input == ' ' || input == '\t')) {
                    state_ = header_lws;
                    return indeterminate;
                } else if (!isChar(input) || isCtl(input) || isTspecial(input)) {
                    return bad;
                } else {
                    req.headers_.emplace_back(Header());
                    req.headers_.back().name_.push_back(input);
                    state_ = header_name;
                    return indeterminate;
                }
            case header_lws:
                if (input == '\r') {
                    state_ = expecting_newline_2;
                    return indeterminate;
                } else if (input == ' ' || input == '\t') {
                    return indeterminate;
                } else if (isCtl(input)) {
                    return bad;
                } else {
                    state_ = header_value;
                    req.headers_.back().value_.push_back(input);
                    return indeterminate;
                }
            case header_name:
                if (input == ':') {
                    state_ = space_before_header_value;
                    return indeterminate;
                } else if (!isChar(input) || isCtl(input) || isTspecial(input)) {
                    return bad;
                } else {
                    req.headers_.back().name_.push_back(input);
                    return indeterminate;
                }
            case space_before_header_value:
                if (input == ' ') {
                    state_ = header_value;
                    return indeterminate;
                } else {
                    return bad;
                }
            case header_value:
                if (input == '\r') {
                    state_ = expecting_newline_2;
                    return indeterminate;
                } else if (isCtl(input)) {
                    return bad;
                } else {
                    req.headers_.back().value_.push_back(input);
                    return indeterminate;
                }
            case expecting_newline_2:
                if (input == '\n') {
                    state_ = header_line_start;
                    return indeterminate;
                } else {
                    return bad;
                }
            case expecting_newline_3:
                return (input == '\n') ? good : bad;
            case json_parse_start:
                if (input == ' ') {
                    return indeterminate;
                }
                if (input == '\n') {
                    state_ = json_key_left_dot;
                    return indeterminate;
                } else {
                    return bad;
                }
            case json_key_left_dot:
                if (input == ' ') {
                    return indeterminate;
                }
//                开始key扫描之前,先加入到body中
                if (!req.key_.empty()) {
                    req.body_.insert(make_pair(req.key_, req.value_));
                    req.key_.clear();
                    req.value_.clear();
                }
                if (input == '"') {
                    state_ = json_key;
                    return indeterminate;
                } else {
                    return bad;
                }
            case json_key:
                if (input == ' ') {
                    return indeterminate;
                }
                if (input == '"') {
                    state_ = json_key_right_dot;
                    return indeterminate;
                } else {
                    // 接收所有输入值
                    req.key_.push_back(input);
                    return indeterminate;
                }
            case json_key_right_dot:
                if (input == ' ') {
                    return indeterminate;
                }
                if (input == '"' || input == ':') {
                    state_ = json_colon;
                    return indeterminate;
                } else {
                    return bad;
                }
            case json_colon:
                if (input == ' ') {
                    return indeterminate;
                }
                if (input == ':') {
                    state_ = json_value_start;
                    return indeterminate;

                } else if (input == '"') {
                    state_ = json_value_start;
                    return indeterminate;
                } else {
                    // 接收所有输入
                    state_ = json_value;
                    req.value_.push_back(input);
                    return indeterminate;
                }
            case json_value_start:
                if (input == ' ') {
                    return indeterminate;
                }
                if (input == '"') {
                    state_ = json_value_left_dot;
                    return indeterminate;
                } else {
                    // 接收所有输入
                    state_ = json_value;
                    req.value_.push_back(input);
                    return indeterminate;
                }
            case json_value_left_dot:
                if (input == ' ') {
                    return indeterminate;
                }
                state_ = json_value;
                if (input == '"') {
                    return indeterminate;
                } else {
                    return bad;
                }
            case json_value:
                if (input == ' ') {
                    return indeterminate;
                }
                if (input == ',') {
                    state_ = json_comma;
                    return indeterminate;
                } else if (input == '"') {
                    state_ = json_value_right_dot;
                    return indeterminate;
                } else if (input == '\n') { // 如果不是逗号,也不是反斜杠,而是换行符,肯定是结束了
                    state_ = json_check_end;
                    return indeterminate;
                } else {
                    req.value_.push_back(input);
                    return indeterminate;
                }
            case json_value_right_dot:
                if (input == ' ') {
                    return indeterminate;
                }
                if (input == ',') {
                    state_ = json_comma;
                    return indeterminate;
                }
                if (input == '"') {
                    state_ = json_check_end;
                    return indeterminate;
                } else {
                    return bad;
                }
            case json_check_end:
                if (input == ' ') {
                    return indeterminate;
                }
                if (input == '\n') {
                    return indeterminate;
                } else if (input == '}') {
                    req.body_.insert(make_pair(req.key_, req.value_));
                    req.key_.clear();
                    req.value_.clear();
                    return good;
                } else if (input == ',') {
                    state_ = json_comma;
                    return indeterminate;
                } else {
                    return bad;
                }
            case json_comma:
                if (input == ' ') {
                    return indeterminate;
                }
                if (input == '\n') {
                    state_ = json_key_left_dot;
                    return indeterminate;
                } else if (input == '\\') {
                    state_ = json_key_left_dot;
                    return indeterminate;
                } else {
                    return bad;
                }
            default:
                return bad;
        }
    }

    void RequestParser::parseParam(Request& req, std::string& data_) {
        //解析uri参数
        int index = (int) req.uri_.find_first_of('?');
        if (index >= 0) {
            // 无uri参数的uri
            req.shortUri_ = req.uri_.substr(0, index);

            /**
             * 拿到?后面的字符串,然后根据uri参数的规则解析
             */
            std::string param_str = req.uri_.substr(index + 1, req.uri_.size());

            std::vector<std::string> split_result;
            boost::split(split_result, param_str, boost::is_any_of("&"));

            for (auto& i : split_result) {
                std::vector<std::string> split_result_temp;
                boost::split(split_result_temp, i, boost::is_any_of("="));
                if (split_result_temp.size() >= 2) {
                    // 原地构造 emplace_back
                    req.uriParams_.insert(make_pair(split_result_temp.at(0), split_result_temp.at(1)));
//                    req.uriParams_.back().name_ = split_result_temp.at(0);
//                    req.uriParams_.back().value_ = split_result_temp.at(1);
                }
            }
        } else {
            req.shortUri_ = req.uri_;
        }
        // 解析uri参数结束


        // 解析body 表单
        std::string content_type;
        for (auto& header : req.headers_) {
            if (boost::algorithm::iequals(header.name_, "content-type")) {
                content_type = header.value_;
                break;
            }
        }

        int index_content_type = (int) content_type.find_first_of(';');
        if (index_content_type > 0) {
            content_type = content_type.substr(0, index_content_type);
        }
    }


    bool RequestParser::isChar(int c) {
        return c >= 0 && c <= 127;
    }

    bool RequestParser::isCtl(int c) {
        return (c >= 0 && c <= 31) || (c == 127);
    }

    bool RequestParser::isTspecial(int c) {
        switch (c) {
            case '(':
            case ')':
            case '<':
            case '>':
            case '@':
            case ',':
            case ';':
            case ':':
            case '\\':
            case '"':
            case '/':
            case '[':
            case ']':
            case '?':
            case '=':
            case '{':
            case '}':
            case ' ':
            case '\t':
                return true;
            default:
                return false;
        }
    }

    bool RequestParser::isDigit(int c) {
        return c >= '0' && c <= '9';
    }
}
本文参与 腾讯云自媒体同步曝光计划,分享自作者个人站点/博客。
原始发表:2021/05/09 ,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 作者个人站点/博客 前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体同步曝光计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档