1af4edf68SEd Tanous #pragma once 2af4edf68SEd Tanous 33ccb3adbSEd Tanous #include "http_request.hpp" 43ccb3adbSEd Tanous 5af4edf68SEd Tanous #include <boost/beast/http/fields.hpp> 6af4edf68SEd Tanous 73544d2a7SEd Tanous #include <ranges> 8af4edf68SEd Tanous #include <string> 9af4edf68SEd Tanous #include <string_view> 10af4edf68SEd Tanous 11af4edf68SEd Tanous enum class ParserError 12af4edf68SEd Tanous { 13af4edf68SEd Tanous PARSER_SUCCESS, 14af4edf68SEd Tanous ERROR_BOUNDARY_FORMAT, 15af4edf68SEd Tanous ERROR_BOUNDARY_CR, 16af4edf68SEd Tanous ERROR_BOUNDARY_LF, 17af4edf68SEd Tanous ERROR_BOUNDARY_DATA, 18af4edf68SEd Tanous ERROR_EMPTY_HEADER, 19af4edf68SEd Tanous ERROR_HEADER_NAME, 20af4edf68SEd Tanous ERROR_HEADER_VALUE, 2118e3f7fbSKrzysztof Grobelny ERROR_HEADER_ENDING, 2218e3f7fbSKrzysztof Grobelny ERROR_UNEXPECTED_END_OF_HEADER, 2318e3f7fbSKrzysztof Grobelny ERROR_UNEXPECTED_END_OF_INPUT, 2418e3f7fbSKrzysztof Grobelny ERROR_OUT_OF_RANGE 25af4edf68SEd Tanous }; 26af4edf68SEd Tanous 27af4edf68SEd Tanous enum class State 28af4edf68SEd Tanous { 29af4edf68SEd Tanous START, 30af4edf68SEd Tanous START_BOUNDARY, 31af4edf68SEd Tanous HEADER_FIELD_START, 32af4edf68SEd Tanous HEADER_FIELD, 33af4edf68SEd Tanous HEADER_VALUE_START, 34af4edf68SEd Tanous HEADER_VALUE, 35af4edf68SEd Tanous HEADER_VALUE_ALMOST_DONE, 36af4edf68SEd Tanous HEADERS_ALMOST_DONE, 37af4edf68SEd Tanous PART_DATA_START, 38af4edf68SEd Tanous PART_DATA, 39af4edf68SEd Tanous END 40af4edf68SEd Tanous }; 41af4edf68SEd Tanous 42af4edf68SEd Tanous enum class Boundary 43af4edf68SEd Tanous { 44af4edf68SEd Tanous NON_BOUNDARY, 45af4edf68SEd Tanous PART_BOUNDARY, 46af4edf68SEd Tanous END_BOUNDARY, 47af4edf68SEd Tanous }; 48af4edf68SEd Tanous 49af4edf68SEd Tanous struct FormPart 50af4edf68SEd Tanous { 51af4edf68SEd Tanous boost::beast::http::fields fields; 52af4edf68SEd Tanous std::string content; 53af4edf68SEd Tanous }; 54af4edf68SEd Tanous 55af4edf68SEd Tanous class MultipartParser 56af4edf68SEd Tanous { 57af4edf68SEd Tanous public: 58af4edf68SEd Tanous MultipartParser() = default; 59af4edf68SEd Tanous parse(const crow::Request & req)60af4edf68SEd Tanous [[nodiscard]] ParserError parse(const crow::Request& req) 61af4edf68SEd Tanous { 62af4edf68SEd Tanous std::string_view contentType = req.getHeaderValue("content-type"); 63af4edf68SEd Tanous 64af4edf68SEd Tanous const std::string boundaryFormat = "multipart/form-data; boundary="; 6511ba3979SEd Tanous if (!contentType.starts_with(boundaryFormat)) 66af4edf68SEd Tanous { 67af4edf68SEd Tanous return ParserError::ERROR_BOUNDARY_FORMAT; 68af4edf68SEd Tanous } 69af4edf68SEd Tanous 70af4edf68SEd Tanous std::string_view ctBoundary = contentType.substr(boundaryFormat.size()); 71af4edf68SEd Tanous 72af4edf68SEd Tanous boundary = "\r\n--"; 73af4edf68SEd Tanous boundary += ctBoundary; 74af4edf68SEd Tanous indexBoundary(); 75af4edf68SEd Tanous lookbehind.resize(boundary.size() + 8); 76af4edf68SEd Tanous state = State::START; 77af4edf68SEd Tanous 780e31e952SPatrick Williams const std::string& buffer = req.body(); 790e31e952SPatrick Williams size_t len = buffer.size(); 80af4edf68SEd Tanous char cl = 0; 81af4edf68SEd Tanous 82af4edf68SEd Tanous for (size_t i = 0; i < len; i++) 83af4edf68SEd Tanous { 84af4edf68SEd Tanous char c = buffer[i]; 85af4edf68SEd Tanous switch (state) 86af4edf68SEd Tanous { 87af4edf68SEd Tanous case State::START: 88af4edf68SEd Tanous index = 0; 89af4edf68SEd Tanous state = State::START_BOUNDARY; 90af4edf68SEd Tanous [[fallthrough]]; 91af4edf68SEd Tanous case State::START_BOUNDARY: 92af4edf68SEd Tanous if (index == boundary.size() - 2) 93af4edf68SEd Tanous { 94af4edf68SEd Tanous if (c != cr) 95af4edf68SEd Tanous { 96af4edf68SEd Tanous return ParserError::ERROR_BOUNDARY_CR; 97af4edf68SEd Tanous } 98af4edf68SEd Tanous index++; 99af4edf68SEd Tanous break; 100af4edf68SEd Tanous } 101af4edf68SEd Tanous else if (index - 1 == boundary.size() - 2) 102af4edf68SEd Tanous { 103af4edf68SEd Tanous if (c != lf) 104af4edf68SEd Tanous { 105af4edf68SEd Tanous return ParserError::ERROR_BOUNDARY_LF; 106af4edf68SEd Tanous } 107af4edf68SEd Tanous index = 0; 10826eee3a1SPatrick Williams mime_fields.emplace_back(); 109af4edf68SEd Tanous state = State::HEADER_FIELD_START; 110af4edf68SEd Tanous break; 111af4edf68SEd Tanous } 112af4edf68SEd Tanous if (c != boundary[index + 2]) 113af4edf68SEd Tanous { 114af4edf68SEd Tanous return ParserError::ERROR_BOUNDARY_DATA; 115af4edf68SEd Tanous } 116af4edf68SEd Tanous index++; 117af4edf68SEd Tanous break; 118af4edf68SEd Tanous case State::HEADER_FIELD_START: 119af4edf68SEd Tanous currentHeaderName.resize(0); 120af4edf68SEd Tanous state = State::HEADER_FIELD; 121af4edf68SEd Tanous headerFieldMark = i; 122af4edf68SEd Tanous index = 0; 123af4edf68SEd Tanous [[fallthrough]]; 124af4edf68SEd Tanous case State::HEADER_FIELD: 125af4edf68SEd Tanous if (c == cr) 126af4edf68SEd Tanous { 127af4edf68SEd Tanous headerFieldMark = 0; 128af4edf68SEd Tanous state = State::HEADERS_ALMOST_DONE; 129af4edf68SEd Tanous break; 130af4edf68SEd Tanous } 131af4edf68SEd Tanous 132af4edf68SEd Tanous index++; 133af4edf68SEd Tanous if (c == hyphen) 134af4edf68SEd Tanous { 135af4edf68SEd Tanous break; 136af4edf68SEd Tanous } 137af4edf68SEd Tanous 138af4edf68SEd Tanous if (c == colon) 139af4edf68SEd Tanous { 140af4edf68SEd Tanous if (index == 1) 141af4edf68SEd Tanous { 142af4edf68SEd Tanous return ParserError::ERROR_EMPTY_HEADER; 143af4edf68SEd Tanous } 144ca45aa3cSEd Tanous 1450e31e952SPatrick Williams currentHeaderName.append(&buffer[headerFieldMark], 146af4edf68SEd Tanous i - headerFieldMark); 147af4edf68SEd Tanous state = State::HEADER_VALUE_START; 148af4edf68SEd Tanous break; 149af4edf68SEd Tanous } 150af4edf68SEd Tanous cl = lower(c); 151af4edf68SEd Tanous if (cl < 'a' || cl > 'z') 152af4edf68SEd Tanous { 153af4edf68SEd Tanous return ParserError::ERROR_HEADER_NAME; 154af4edf68SEd Tanous } 155af4edf68SEd Tanous break; 156af4edf68SEd Tanous case State::HEADER_VALUE_START: 157af4edf68SEd Tanous if (c == space) 158af4edf68SEd Tanous { 159af4edf68SEd Tanous break; 160af4edf68SEd Tanous } 161af4edf68SEd Tanous headerValueMark = i; 162af4edf68SEd Tanous state = State::HEADER_VALUE; 163af4edf68SEd Tanous [[fallthrough]]; 164af4edf68SEd Tanous case State::HEADER_VALUE: 165af4edf68SEd Tanous if (c == cr) 166af4edf68SEd Tanous { 1670e31e952SPatrick Williams std::string_view value(&buffer[headerValueMark], 168af4edf68SEd Tanous i - headerValueMark); 169af4edf68SEd Tanous mime_fields.rbegin()->fields.set(currentHeaderName, 170af4edf68SEd Tanous value); 171af4edf68SEd Tanous state = State::HEADER_VALUE_ALMOST_DONE; 172af4edf68SEd Tanous } 173af4edf68SEd Tanous break; 174af4edf68SEd Tanous case State::HEADER_VALUE_ALMOST_DONE: 175af4edf68SEd Tanous if (c != lf) 176af4edf68SEd Tanous { 177af4edf68SEd Tanous return ParserError::ERROR_HEADER_VALUE; 178af4edf68SEd Tanous } 179af4edf68SEd Tanous state = State::HEADER_FIELD_START; 180af4edf68SEd Tanous break; 181af4edf68SEd Tanous case State::HEADERS_ALMOST_DONE: 182af4edf68SEd Tanous if (c != lf) 183af4edf68SEd Tanous { 184af4edf68SEd Tanous return ParserError::ERROR_HEADER_ENDING; 185af4edf68SEd Tanous } 18618e3f7fbSKrzysztof Grobelny if (index > 0) 18718e3f7fbSKrzysztof Grobelny { 18818e3f7fbSKrzysztof Grobelny return ParserError::ERROR_UNEXPECTED_END_OF_HEADER; 18918e3f7fbSKrzysztof Grobelny } 190af4edf68SEd Tanous state = State::PART_DATA_START; 191af4edf68SEd Tanous break; 192af4edf68SEd Tanous case State::PART_DATA_START: 193af4edf68SEd Tanous state = State::PART_DATA; 194af4edf68SEd Tanous partDataMark = i; 195af4edf68SEd Tanous [[fallthrough]]; 196af4edf68SEd Tanous case State::PART_DATA: 19718e3f7fbSKrzysztof Grobelny { 198af4edf68SEd Tanous if (index == 0) 199af4edf68SEd Tanous { 2000e31e952SPatrick Williams skipNonBoundary(buffer, boundary.size() - 1, i); 201af4edf68SEd Tanous c = buffer[i]; 202af4edf68SEd Tanous } 2030e31e952SPatrick Williams if (auto ec = processPartData(buffer, i, c); 2040e31e952SPatrick Williams ec != ParserError::PARSER_SUCCESS) 20518e3f7fbSKrzysztof Grobelny { 20618e3f7fbSKrzysztof Grobelny return ec; 20718e3f7fbSKrzysztof Grobelny } 208af4edf68SEd Tanous break; 20918e3f7fbSKrzysztof Grobelny } 210af4edf68SEd Tanous case State::END: 211af4edf68SEd Tanous break; 2124da0490bSEd Tanous default: 2134da0490bSEd Tanous return ParserError::ERROR_UNEXPECTED_END_OF_INPUT; 214af4edf68SEd Tanous } 215af4edf68SEd Tanous } 21618e3f7fbSKrzysztof Grobelny 21718e3f7fbSKrzysztof Grobelny if (state != State::END) 21818e3f7fbSKrzysztof Grobelny { 21918e3f7fbSKrzysztof Grobelny return ParserError::ERROR_UNEXPECTED_END_OF_INPUT; 22018e3f7fbSKrzysztof Grobelny } 22118e3f7fbSKrzysztof Grobelny 222af4edf68SEd Tanous return ParserError::PARSER_SUCCESS; 223af4edf68SEd Tanous } 224af4edf68SEd Tanous std::vector<FormPart> mime_fields; 225af4edf68SEd Tanous std::string boundary; 226af4edf68SEd Tanous 227af4edf68SEd Tanous private: indexBoundary()228af4edf68SEd Tanous void indexBoundary() 229af4edf68SEd Tanous { 2303544d2a7SEd Tanous std::ranges::fill(boundaryIndex, 0); 231af4edf68SEd Tanous for (const char current : boundary) 232af4edf68SEd Tanous { 233af4edf68SEd Tanous boundaryIndex[static_cast<unsigned char>(current)] = true; 234af4edf68SEd Tanous } 235af4edf68SEd Tanous } 236af4edf68SEd Tanous lower(char c)23756d2396dSEd Tanous static char lower(char c) 238af4edf68SEd Tanous { 239af4edf68SEd Tanous return static_cast<char>(c | 0x20); 240af4edf68SEd Tanous } 241af4edf68SEd Tanous isBoundaryChar(char c) const242*9de65b34SEd Tanous bool isBoundaryChar(char c) const 243af4edf68SEd Tanous { 244af4edf68SEd Tanous return boundaryIndex[static_cast<unsigned char>(c)]; 245af4edf68SEd Tanous } 246af4edf68SEd Tanous skipNonBoundary(const std::string & buffer,size_t boundaryEnd,size_t & i)2470e31e952SPatrick Williams void skipNonBoundary(const std::string& buffer, size_t boundaryEnd, 248af4edf68SEd Tanous size_t& i) 249af4edf68SEd Tanous { 250af4edf68SEd Tanous // boyer-moore derived algorithm to safely skip non-boundary data 2510e31e952SPatrick Williams while (i + boundary.size() <= buffer.length()) 252af4edf68SEd Tanous { 253af4edf68SEd Tanous if (isBoundaryChar(buffer[i + boundaryEnd])) 254af4edf68SEd Tanous { 255af4edf68SEd Tanous break; 256af4edf68SEd Tanous } 257af4edf68SEd Tanous i += boundary.size(); 258af4edf68SEd Tanous } 259af4edf68SEd Tanous } 260af4edf68SEd Tanous processPartData(const std::string & buffer,size_t & i,char c)2610e31e952SPatrick Williams ParserError processPartData(const std::string& buffer, size_t& i, char c) 262af4edf68SEd Tanous { 26318e3f7fbSKrzysztof Grobelny size_t prevIndex = index; 264af4edf68SEd Tanous 265af4edf68SEd Tanous if (index < boundary.size()) 266af4edf68SEd Tanous { 267af4edf68SEd Tanous if (boundary[index] == c) 268af4edf68SEd Tanous { 269af4edf68SEd Tanous if (index == 0) 270af4edf68SEd Tanous { 2710e31e952SPatrick Williams const char* start = &buffer[partDataMark]; 272ca45aa3cSEd Tanous size_t size = i - partDataMark; 27389492a15SPatrick Williams mime_fields.rbegin()->content += std::string_view(start, 27489492a15SPatrick Williams size); 275af4edf68SEd Tanous } 276af4edf68SEd Tanous index++; 277af4edf68SEd Tanous } 278af4edf68SEd Tanous else 279af4edf68SEd Tanous { 280af4edf68SEd Tanous index = 0; 281af4edf68SEd Tanous } 282af4edf68SEd Tanous } 283af4edf68SEd Tanous else if (index == boundary.size()) 284af4edf68SEd Tanous { 285af4edf68SEd Tanous index++; 286af4edf68SEd Tanous if (c == cr) 287af4edf68SEd Tanous { 288af4edf68SEd Tanous // cr = part boundary 289af4edf68SEd Tanous flags = Boundary::PART_BOUNDARY; 290af4edf68SEd Tanous } 291af4edf68SEd Tanous else if (c == hyphen) 292af4edf68SEd Tanous { 293af4edf68SEd Tanous // hyphen = end boundary 294af4edf68SEd Tanous flags = Boundary::END_BOUNDARY; 295af4edf68SEd Tanous } 296af4edf68SEd Tanous else 297af4edf68SEd Tanous { 298af4edf68SEd Tanous index = 0; 299af4edf68SEd Tanous } 300af4edf68SEd Tanous } 301af4edf68SEd Tanous else 302af4edf68SEd Tanous { 303af4edf68SEd Tanous if (flags == Boundary::PART_BOUNDARY) 304af4edf68SEd Tanous { 305af4edf68SEd Tanous index = 0; 306af4edf68SEd Tanous if (c == lf) 307af4edf68SEd Tanous { 308af4edf68SEd Tanous // unset the PART_BOUNDARY flag 309af4edf68SEd Tanous flags = Boundary::NON_BOUNDARY; 31026eee3a1SPatrick Williams mime_fields.emplace_back(); 311af4edf68SEd Tanous state = State::HEADER_FIELD_START; 31218e3f7fbSKrzysztof Grobelny return ParserError::PARSER_SUCCESS; 313af4edf68SEd Tanous } 314af4edf68SEd Tanous } 315af4edf68SEd Tanous if (flags == Boundary::END_BOUNDARY) 316af4edf68SEd Tanous { 317af4edf68SEd Tanous if (c == hyphen) 318af4edf68SEd Tanous { 319af4edf68SEd Tanous state = State::END; 320af4edf68SEd Tanous } 32118e3f7fbSKrzysztof Grobelny else 32218e3f7fbSKrzysztof Grobelny { 32318e3f7fbSKrzysztof Grobelny flags = Boundary::NON_BOUNDARY; 32418e3f7fbSKrzysztof Grobelny index = 0; 32518e3f7fbSKrzysztof Grobelny } 326af4edf68SEd Tanous } 327af4edf68SEd Tanous } 328af4edf68SEd Tanous 329af4edf68SEd Tanous if (index > 0) 330af4edf68SEd Tanous { 33118e3f7fbSKrzysztof Grobelny if ((index - 1) >= lookbehind.size()) 33218e3f7fbSKrzysztof Grobelny { 33318e3f7fbSKrzysztof Grobelny // Should never happen, but when it does it won't cause crash 33418e3f7fbSKrzysztof Grobelny return ParserError::ERROR_OUT_OF_RANGE; 33518e3f7fbSKrzysztof Grobelny } 336af4edf68SEd Tanous lookbehind[index - 1] = c; 337af4edf68SEd Tanous } 338af4edf68SEd Tanous else if (prevIndex > 0) 339af4edf68SEd Tanous { 340af4edf68SEd Tanous // if our boundary turned out to be rubbish, the captured 341af4edf68SEd Tanous // lookbehind belongs to partData 342af4edf68SEd Tanous 343af4edf68SEd Tanous mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex); 344af4edf68SEd Tanous partDataMark = i; 345af4edf68SEd Tanous 346af4edf68SEd Tanous // reconsider the current character even so it interrupted 347af4edf68SEd Tanous // the sequence it could be the beginning of a new sequence 348af4edf68SEd Tanous i--; 349af4edf68SEd Tanous } 35018e3f7fbSKrzysztof Grobelny return ParserError::PARSER_SUCCESS; 351af4edf68SEd Tanous } 352af4edf68SEd Tanous 353af4edf68SEd Tanous std::string currentHeaderName; 354af4edf68SEd Tanous std::string currentHeaderValue; 355af4edf68SEd Tanous 356af4edf68SEd Tanous static constexpr char cr = '\r'; 357af4edf68SEd Tanous static constexpr char lf = '\n'; 358af4edf68SEd Tanous static constexpr char space = ' '; 359af4edf68SEd Tanous static constexpr char hyphen = '-'; 360af4edf68SEd Tanous static constexpr char colon = ':'; 361af4edf68SEd Tanous 362d3a9e084SEd Tanous std::array<bool, 256> boundaryIndex{}; 363af4edf68SEd Tanous std::string lookbehind; 364d3a9e084SEd Tanous State state{State::START}; 365d3a9e084SEd Tanous Boundary flags{Boundary::NON_BOUNDARY}; 366af4edf68SEd Tanous size_t index = 0; 367af4edf68SEd Tanous size_t partDataMark = 0; 368af4edf68SEd Tanous size_t headerFieldMark = 0; 369af4edf68SEd Tanous size_t headerValueMark = 0; 370af4edf68SEd Tanous }; 371