1af4edf68SEd Tanous #pragma once 2af4edf68SEd Tanous 3af4edf68SEd Tanous #include <boost/beast/http/fields.hpp> 4af4edf68SEd Tanous #include <http_request.hpp> 5af4edf68SEd Tanous 6af4edf68SEd Tanous #include <string> 7af4edf68SEd Tanous #include <string_view> 8af4edf68SEd Tanous 9af4edf68SEd Tanous enum class ParserError 10af4edf68SEd Tanous { 11af4edf68SEd Tanous PARSER_SUCCESS, 12af4edf68SEd Tanous ERROR_BOUNDARY_FORMAT, 13af4edf68SEd Tanous ERROR_BOUNDARY_CR, 14af4edf68SEd Tanous ERROR_BOUNDARY_LF, 15af4edf68SEd Tanous ERROR_BOUNDARY_DATA, 16af4edf68SEd Tanous ERROR_EMPTY_HEADER, 17af4edf68SEd Tanous ERROR_HEADER_NAME, 18af4edf68SEd Tanous ERROR_HEADER_VALUE, 19af4edf68SEd Tanous ERROR_HEADER_ENDING 20af4edf68SEd Tanous }; 21af4edf68SEd Tanous 22af4edf68SEd Tanous enum class State 23af4edf68SEd Tanous { 24af4edf68SEd Tanous START, 25af4edf68SEd Tanous START_BOUNDARY, 26af4edf68SEd Tanous HEADER_FIELD_START, 27af4edf68SEd Tanous HEADER_FIELD, 28af4edf68SEd Tanous HEADER_VALUE_START, 29af4edf68SEd Tanous HEADER_VALUE, 30af4edf68SEd Tanous HEADER_VALUE_ALMOST_DONE, 31af4edf68SEd Tanous HEADERS_ALMOST_DONE, 32af4edf68SEd Tanous PART_DATA_START, 33af4edf68SEd Tanous PART_DATA, 34af4edf68SEd Tanous END 35af4edf68SEd Tanous }; 36af4edf68SEd Tanous 37af4edf68SEd Tanous enum class Boundary 38af4edf68SEd Tanous { 39af4edf68SEd Tanous NON_BOUNDARY, 40af4edf68SEd Tanous PART_BOUNDARY, 41af4edf68SEd Tanous END_BOUNDARY, 42af4edf68SEd Tanous }; 43af4edf68SEd Tanous 44af4edf68SEd Tanous struct FormPart 45af4edf68SEd Tanous { 46af4edf68SEd Tanous boost::beast::http::fields fields; 47af4edf68SEd Tanous std::string content; 48af4edf68SEd Tanous }; 49af4edf68SEd Tanous 50af4edf68SEd Tanous class MultipartParser 51af4edf68SEd Tanous { 52af4edf68SEd Tanous public: 53af4edf68SEd Tanous MultipartParser() = default; 54af4edf68SEd Tanous 55af4edf68SEd Tanous [[nodiscard]] ParserError parse(const crow::Request& req) 56af4edf68SEd Tanous { 57af4edf68SEd Tanous std::string_view contentType = req.getHeaderValue("content-type"); 58af4edf68SEd Tanous 59af4edf68SEd Tanous const std::string boundaryFormat = "multipart/form-data; boundary="; 60*11ba3979SEd Tanous if (!contentType.starts_with(boundaryFormat)) 61af4edf68SEd Tanous { 62af4edf68SEd Tanous return ParserError::ERROR_BOUNDARY_FORMAT; 63af4edf68SEd Tanous } 64af4edf68SEd Tanous 65af4edf68SEd Tanous std::string_view ctBoundary = contentType.substr(boundaryFormat.size()); 66af4edf68SEd Tanous 67af4edf68SEd Tanous boundary = "\r\n--"; 68af4edf68SEd Tanous boundary += ctBoundary; 69af4edf68SEd Tanous indexBoundary(); 70af4edf68SEd Tanous lookbehind.resize(boundary.size() + 8); 71af4edf68SEd Tanous state = State::START; 72af4edf68SEd Tanous 73af4edf68SEd Tanous const char* buffer = req.body.data(); 74af4edf68SEd Tanous size_t len = req.body.size(); 75af4edf68SEd Tanous size_t prevIndex = index; 76af4edf68SEd Tanous char cl = 0; 77af4edf68SEd Tanous 78af4edf68SEd Tanous for (size_t i = 0; i < len; i++) 79af4edf68SEd Tanous { 80ca45aa3cSEd Tanous // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 81af4edf68SEd Tanous char c = buffer[i]; 82af4edf68SEd Tanous switch (state) 83af4edf68SEd Tanous { 84af4edf68SEd Tanous case State::START: 85af4edf68SEd Tanous index = 0; 86af4edf68SEd Tanous state = State::START_BOUNDARY; 87af4edf68SEd Tanous [[fallthrough]]; 88af4edf68SEd Tanous case State::START_BOUNDARY: 89af4edf68SEd Tanous if (index == boundary.size() - 2) 90af4edf68SEd Tanous { 91af4edf68SEd Tanous if (c != cr) 92af4edf68SEd Tanous { 93af4edf68SEd Tanous return ParserError::ERROR_BOUNDARY_CR; 94af4edf68SEd Tanous } 95af4edf68SEd Tanous index++; 96af4edf68SEd Tanous break; 97af4edf68SEd Tanous } 98af4edf68SEd Tanous else if (index - 1 == boundary.size() - 2) 99af4edf68SEd Tanous { 100af4edf68SEd Tanous if (c != lf) 101af4edf68SEd Tanous { 102af4edf68SEd Tanous return ParserError::ERROR_BOUNDARY_LF; 103af4edf68SEd Tanous } 104af4edf68SEd Tanous index = 0; 105af4edf68SEd Tanous mime_fields.push_back({}); 106af4edf68SEd Tanous state = State::HEADER_FIELD_START; 107af4edf68SEd Tanous break; 108af4edf68SEd Tanous } 109af4edf68SEd Tanous if (c != boundary[index + 2]) 110af4edf68SEd Tanous { 111af4edf68SEd Tanous return ParserError::ERROR_BOUNDARY_DATA; 112af4edf68SEd Tanous } 113af4edf68SEd Tanous index++; 114af4edf68SEd Tanous break; 115af4edf68SEd Tanous case State::HEADER_FIELD_START: 116af4edf68SEd Tanous currentHeaderName.resize(0); 117af4edf68SEd Tanous state = State::HEADER_FIELD; 118af4edf68SEd Tanous headerFieldMark = i; 119af4edf68SEd Tanous index = 0; 120af4edf68SEd Tanous [[fallthrough]]; 121af4edf68SEd Tanous case State::HEADER_FIELD: 122af4edf68SEd Tanous if (c == cr) 123af4edf68SEd Tanous { 124af4edf68SEd Tanous headerFieldMark = 0; 125af4edf68SEd Tanous state = State::HEADERS_ALMOST_DONE; 126af4edf68SEd Tanous break; 127af4edf68SEd Tanous } 128af4edf68SEd Tanous 129af4edf68SEd Tanous index++; 130af4edf68SEd Tanous if (c == hyphen) 131af4edf68SEd Tanous { 132af4edf68SEd Tanous break; 133af4edf68SEd Tanous } 134af4edf68SEd Tanous 135af4edf68SEd Tanous if (c == colon) 136af4edf68SEd Tanous { 137af4edf68SEd Tanous if (index == 1) 138af4edf68SEd Tanous { 139af4edf68SEd Tanous return ParserError::ERROR_EMPTY_HEADER; 140af4edf68SEd Tanous } 141ca45aa3cSEd Tanous 142ca45aa3cSEd Tanous // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 143af4edf68SEd Tanous currentHeaderName.append(buffer + headerFieldMark, 144af4edf68SEd Tanous i - headerFieldMark); 145af4edf68SEd Tanous state = State::HEADER_VALUE_START; 146af4edf68SEd Tanous break; 147af4edf68SEd Tanous } 148af4edf68SEd Tanous cl = lower(c); 149af4edf68SEd Tanous if (cl < 'a' || cl > 'z') 150af4edf68SEd Tanous { 151af4edf68SEd Tanous return ParserError::ERROR_HEADER_NAME; 152af4edf68SEd Tanous } 153af4edf68SEd Tanous break; 154af4edf68SEd Tanous case State::HEADER_VALUE_START: 155af4edf68SEd Tanous if (c == space) 156af4edf68SEd Tanous { 157af4edf68SEd Tanous break; 158af4edf68SEd Tanous } 159af4edf68SEd Tanous headerValueMark = i; 160af4edf68SEd Tanous state = State::HEADER_VALUE; 161af4edf68SEd Tanous [[fallthrough]]; 162af4edf68SEd Tanous case State::HEADER_VALUE: 163af4edf68SEd Tanous if (c == cr) 164af4edf68SEd Tanous { 165ca45aa3cSEd Tanous // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 166af4edf68SEd Tanous std::string_view value(buffer + headerValueMark, 167af4edf68SEd Tanous i - headerValueMark); 168af4edf68SEd Tanous mime_fields.rbegin()->fields.set(currentHeaderName, 169af4edf68SEd Tanous value); 170af4edf68SEd Tanous state = State::HEADER_VALUE_ALMOST_DONE; 171af4edf68SEd Tanous } 172af4edf68SEd Tanous break; 173af4edf68SEd Tanous case State::HEADER_VALUE_ALMOST_DONE: 174af4edf68SEd Tanous if (c != lf) 175af4edf68SEd Tanous { 176af4edf68SEd Tanous return ParserError::ERROR_HEADER_VALUE; 177af4edf68SEd Tanous } 178af4edf68SEd Tanous state = State::HEADER_FIELD_START; 179af4edf68SEd Tanous break; 180af4edf68SEd Tanous case State::HEADERS_ALMOST_DONE: 181af4edf68SEd Tanous if (c != lf) 182af4edf68SEd Tanous { 183af4edf68SEd Tanous return ParserError::ERROR_HEADER_ENDING; 184af4edf68SEd Tanous } 185af4edf68SEd Tanous state = State::PART_DATA_START; 186af4edf68SEd Tanous break; 187af4edf68SEd Tanous case State::PART_DATA_START: 188af4edf68SEd Tanous state = State::PART_DATA; 189af4edf68SEd Tanous partDataMark = i; 190af4edf68SEd Tanous [[fallthrough]]; 191af4edf68SEd Tanous case State::PART_DATA: 192af4edf68SEd Tanous if (index == 0) 193af4edf68SEd Tanous { 194af4edf68SEd Tanous skipNonBoundary(buffer, len, boundary.size() - 1, i); 195ca45aa3cSEd Tanous 196ca45aa3cSEd Tanous // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 197af4edf68SEd Tanous c = buffer[i]; 198af4edf68SEd Tanous } 1998a592810SEd Tanous processPartData(prevIndex, buffer, i, c); 200af4edf68SEd Tanous break; 201af4edf68SEd Tanous case State::END: 202af4edf68SEd Tanous break; 203af4edf68SEd Tanous } 204af4edf68SEd Tanous } 205af4edf68SEd Tanous return ParserError::PARSER_SUCCESS; 206af4edf68SEd Tanous } 207af4edf68SEd Tanous std::vector<FormPart> mime_fields; 208af4edf68SEd Tanous std::string boundary; 209af4edf68SEd Tanous 210af4edf68SEd Tanous private: 211af4edf68SEd Tanous void indexBoundary() 212af4edf68SEd Tanous { 213af4edf68SEd Tanous std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0); 214af4edf68SEd Tanous for (const char current : boundary) 215af4edf68SEd Tanous { 216af4edf68SEd Tanous boundaryIndex[static_cast<unsigned char>(current)] = true; 217af4edf68SEd Tanous } 218af4edf68SEd Tanous } 219af4edf68SEd Tanous 22056d2396dSEd Tanous static char lower(char c) 221af4edf68SEd Tanous { 222af4edf68SEd Tanous return static_cast<char>(c | 0x20); 223af4edf68SEd Tanous } 224af4edf68SEd Tanous 225af4edf68SEd Tanous inline bool isBoundaryChar(char c) const 226af4edf68SEd Tanous { 227af4edf68SEd Tanous return boundaryIndex[static_cast<unsigned char>(c)]; 228af4edf68SEd Tanous } 229af4edf68SEd Tanous 230af4edf68SEd Tanous void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd, 231af4edf68SEd Tanous size_t& i) 232af4edf68SEd Tanous { 233af4edf68SEd Tanous // boyer-moore derived algorithm to safely skip non-boundary data 234af4edf68SEd Tanous while (i + boundary.size() <= len) 235af4edf68SEd Tanous { 236ca45aa3cSEd Tanous // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 237af4edf68SEd Tanous if (isBoundaryChar(buffer[i + boundaryEnd])) 238af4edf68SEd Tanous { 239af4edf68SEd Tanous break; 240af4edf68SEd Tanous } 241af4edf68SEd Tanous i += boundary.size(); 242af4edf68SEd Tanous } 243af4edf68SEd Tanous } 244af4edf68SEd Tanous 2458a592810SEd Tanous void processPartData(size_t& prevIndex, const char* buffer, size_t& i, 2468a592810SEd Tanous char c) 247af4edf68SEd Tanous { 248af4edf68SEd Tanous prevIndex = index; 249af4edf68SEd Tanous 250af4edf68SEd Tanous if (index < boundary.size()) 251af4edf68SEd Tanous { 252af4edf68SEd Tanous if (boundary[index] == c) 253af4edf68SEd Tanous { 254af4edf68SEd Tanous if (index == 0) 255af4edf68SEd Tanous { 256ca45aa3cSEd Tanous // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 257ca45aa3cSEd Tanous const char* start = buffer + partDataMark; 258ca45aa3cSEd Tanous size_t size = i - partDataMark; 259ca45aa3cSEd Tanous mime_fields.rbegin()->content += 260ca45aa3cSEd Tanous std::string_view(start, size); 261af4edf68SEd Tanous } 262af4edf68SEd Tanous index++; 263af4edf68SEd Tanous } 264af4edf68SEd Tanous else 265af4edf68SEd Tanous { 266af4edf68SEd Tanous index = 0; 267af4edf68SEd Tanous } 268af4edf68SEd Tanous } 269af4edf68SEd Tanous else if (index == boundary.size()) 270af4edf68SEd Tanous { 271af4edf68SEd Tanous index++; 272af4edf68SEd Tanous if (c == cr) 273af4edf68SEd Tanous { 274af4edf68SEd Tanous // cr = part boundary 275af4edf68SEd Tanous flags = Boundary::PART_BOUNDARY; 276af4edf68SEd Tanous } 277af4edf68SEd Tanous else if (c == hyphen) 278af4edf68SEd Tanous { 279af4edf68SEd Tanous // hyphen = end boundary 280af4edf68SEd Tanous flags = Boundary::END_BOUNDARY; 281af4edf68SEd Tanous } 282af4edf68SEd Tanous else 283af4edf68SEd Tanous { 284af4edf68SEd Tanous index = 0; 285af4edf68SEd Tanous } 286af4edf68SEd Tanous } 287af4edf68SEd Tanous else 288af4edf68SEd Tanous { 289af4edf68SEd Tanous if (flags == Boundary::PART_BOUNDARY) 290af4edf68SEd Tanous { 291af4edf68SEd Tanous index = 0; 292af4edf68SEd Tanous if (c == lf) 293af4edf68SEd Tanous { 294af4edf68SEd Tanous // unset the PART_BOUNDARY flag 295af4edf68SEd Tanous flags = Boundary::NON_BOUNDARY; 296af4edf68SEd Tanous mime_fields.push_back({}); 297af4edf68SEd Tanous state = State::HEADER_FIELD_START; 298af4edf68SEd Tanous return; 299af4edf68SEd Tanous } 300af4edf68SEd Tanous } 301af4edf68SEd Tanous if (flags == Boundary::END_BOUNDARY) 302af4edf68SEd Tanous { 303af4edf68SEd Tanous if (c == hyphen) 304af4edf68SEd Tanous { 305af4edf68SEd Tanous state = State::END; 306af4edf68SEd Tanous } 307af4edf68SEd Tanous } 308af4edf68SEd Tanous } 309af4edf68SEd Tanous 310af4edf68SEd Tanous if (index > 0) 311af4edf68SEd Tanous { 312af4edf68SEd Tanous lookbehind[index - 1] = c; 313af4edf68SEd Tanous } 314af4edf68SEd Tanous else if (prevIndex > 0) 315af4edf68SEd Tanous { 316af4edf68SEd Tanous // if our boundary turned out to be rubbish, the captured 317af4edf68SEd Tanous // lookbehind belongs to partData 318af4edf68SEd Tanous 319af4edf68SEd Tanous mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex); 320af4edf68SEd Tanous prevIndex = 0; 321af4edf68SEd Tanous partDataMark = i; 322af4edf68SEd Tanous 323af4edf68SEd Tanous // reconsider the current character even so it interrupted 324af4edf68SEd Tanous // the sequence it could be the beginning of a new sequence 325af4edf68SEd Tanous i--; 326af4edf68SEd Tanous } 327af4edf68SEd Tanous } 328af4edf68SEd Tanous 329af4edf68SEd Tanous std::string currentHeaderName; 330af4edf68SEd Tanous std::string currentHeaderValue; 331af4edf68SEd Tanous 332af4edf68SEd Tanous static constexpr char cr = '\r'; 333af4edf68SEd Tanous static constexpr char lf = '\n'; 334af4edf68SEd Tanous static constexpr char space = ' '; 335af4edf68SEd Tanous static constexpr char hyphen = '-'; 336af4edf68SEd Tanous static constexpr char colon = ':'; 337af4edf68SEd Tanous 338d3a9e084SEd Tanous std::array<bool, 256> boundaryIndex{}; 339af4edf68SEd Tanous std::string lookbehind; 340d3a9e084SEd Tanous State state{State::START}; 341d3a9e084SEd Tanous Boundary flags{Boundary::NON_BOUNDARY}; 342af4edf68SEd Tanous size_t index = 0; 343af4edf68SEd Tanous size_t partDataMark = 0; 344af4edf68SEd Tanous size_t headerFieldMark = 0; 345af4edf68SEd Tanous size_t headerValueMark = 0; 346af4edf68SEd Tanous }; 347