1af4edf68SEd Tanous #pragma once 2af4edf68SEd Tanous 3af4edf68SEd Tanous #include <boost/algorithm/string/predicate.hpp> 4af4edf68SEd Tanous #include <boost/beast/http/fields.hpp> 5af4edf68SEd Tanous #include <http_request.hpp> 6af4edf68SEd Tanous 7af4edf68SEd Tanous #include <string> 8af4edf68SEd Tanous #include <string_view> 9af4edf68SEd Tanous 10af4edf68SEd Tanous enum class ParserError 11af4edf68SEd Tanous { 12af4edf68SEd Tanous PARSER_SUCCESS, 13af4edf68SEd Tanous ERROR_BOUNDARY_FORMAT, 14af4edf68SEd Tanous ERROR_BOUNDARY_CR, 15af4edf68SEd Tanous ERROR_BOUNDARY_LF, 16af4edf68SEd Tanous ERROR_BOUNDARY_DATA, 17af4edf68SEd Tanous ERROR_EMPTY_HEADER, 18af4edf68SEd Tanous ERROR_HEADER_NAME, 19af4edf68SEd Tanous ERROR_HEADER_VALUE, 20af4edf68SEd Tanous ERROR_HEADER_ENDING 21af4edf68SEd Tanous }; 22af4edf68SEd Tanous 23af4edf68SEd Tanous enum class State 24af4edf68SEd Tanous { 25af4edf68SEd Tanous START, 26af4edf68SEd Tanous START_BOUNDARY, 27af4edf68SEd Tanous HEADER_FIELD_START, 28af4edf68SEd Tanous HEADER_FIELD, 29af4edf68SEd Tanous HEADER_VALUE_START, 30af4edf68SEd Tanous HEADER_VALUE, 31af4edf68SEd Tanous HEADER_VALUE_ALMOST_DONE, 32af4edf68SEd Tanous HEADERS_ALMOST_DONE, 33af4edf68SEd Tanous PART_DATA_START, 34af4edf68SEd Tanous PART_DATA, 35af4edf68SEd Tanous END 36af4edf68SEd Tanous }; 37af4edf68SEd Tanous 38af4edf68SEd Tanous enum class Boundary 39af4edf68SEd Tanous { 40af4edf68SEd Tanous NON_BOUNDARY, 41af4edf68SEd Tanous PART_BOUNDARY, 42af4edf68SEd Tanous END_BOUNDARY, 43af4edf68SEd Tanous }; 44af4edf68SEd Tanous 45af4edf68SEd Tanous struct FormPart 46af4edf68SEd Tanous { 47af4edf68SEd Tanous boost::beast::http::fields fields; 48af4edf68SEd Tanous std::string content; 49af4edf68SEd Tanous }; 50af4edf68SEd Tanous 51af4edf68SEd Tanous class MultipartParser 52af4edf68SEd Tanous { 53af4edf68SEd Tanous public: 54af4edf68SEd Tanous MultipartParser() = default; 55af4edf68SEd Tanous 56af4edf68SEd Tanous [[nodiscard]] ParserError parse(const crow::Request& req) 57af4edf68SEd Tanous { 58af4edf68SEd Tanous std::string_view contentType = req.getHeaderValue("content-type"); 59af4edf68SEd Tanous 60af4edf68SEd Tanous const std::string boundaryFormat = "multipart/form-data; boundary="; 61af4edf68SEd Tanous if (!boost::starts_with(req.getHeaderValue("content-type"), 62af4edf68SEd Tanous boundaryFormat)) 63af4edf68SEd Tanous { 64af4edf68SEd Tanous return ParserError::ERROR_BOUNDARY_FORMAT; 65af4edf68SEd Tanous } 66af4edf68SEd Tanous 67af4edf68SEd Tanous std::string_view ctBoundary = contentType.substr(boundaryFormat.size()); 68af4edf68SEd Tanous 69af4edf68SEd Tanous boundary = "\r\n--"; 70af4edf68SEd Tanous boundary += ctBoundary; 71af4edf68SEd Tanous indexBoundary(); 72af4edf68SEd Tanous lookbehind.resize(boundary.size() + 8); 73af4edf68SEd Tanous state = State::START; 74af4edf68SEd Tanous 75af4edf68SEd Tanous const char* buffer = req.body.data(); 76af4edf68SEd Tanous size_t len = req.body.size(); 77af4edf68SEd Tanous size_t prevIndex = index; 78af4edf68SEd Tanous char cl = 0; 79af4edf68SEd Tanous 80af4edf68SEd Tanous for (size_t i = 0; i < len; i++) 81af4edf68SEd Tanous { 82*ca45aa3cSEd Tanous // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 83af4edf68SEd Tanous char c = buffer[i]; 84af4edf68SEd Tanous switch (state) 85af4edf68SEd Tanous { 86af4edf68SEd Tanous case State::START: 87af4edf68SEd Tanous index = 0; 88af4edf68SEd Tanous state = State::START_BOUNDARY; 89af4edf68SEd Tanous [[fallthrough]]; 90af4edf68SEd Tanous case State::START_BOUNDARY: 91af4edf68SEd Tanous if (index == boundary.size() - 2) 92af4edf68SEd Tanous { 93af4edf68SEd Tanous if (c != cr) 94af4edf68SEd Tanous { 95af4edf68SEd Tanous return ParserError::ERROR_BOUNDARY_CR; 96af4edf68SEd Tanous } 97af4edf68SEd Tanous index++; 98af4edf68SEd Tanous break; 99af4edf68SEd Tanous } 100af4edf68SEd Tanous else if (index - 1 == boundary.size() - 2) 101af4edf68SEd Tanous { 102af4edf68SEd Tanous if (c != lf) 103af4edf68SEd Tanous { 104af4edf68SEd Tanous return ParserError::ERROR_BOUNDARY_LF; 105af4edf68SEd Tanous } 106af4edf68SEd Tanous index = 0; 107af4edf68SEd Tanous mime_fields.push_back({}); 108af4edf68SEd Tanous state = State::HEADER_FIELD_START; 109af4edf68SEd Tanous break; 110af4edf68SEd Tanous } 111af4edf68SEd Tanous if (c != boundary[index + 2]) 112af4edf68SEd Tanous { 113af4edf68SEd Tanous return ParserError::ERROR_BOUNDARY_DATA; 114af4edf68SEd Tanous } 115af4edf68SEd Tanous index++; 116af4edf68SEd Tanous break; 117af4edf68SEd Tanous case State::HEADER_FIELD_START: 118af4edf68SEd Tanous currentHeaderName.resize(0); 119af4edf68SEd Tanous state = State::HEADER_FIELD; 120af4edf68SEd Tanous headerFieldMark = i; 121af4edf68SEd Tanous index = 0; 122af4edf68SEd Tanous [[fallthrough]]; 123af4edf68SEd Tanous case State::HEADER_FIELD: 124af4edf68SEd Tanous if (c == cr) 125af4edf68SEd Tanous { 126af4edf68SEd Tanous headerFieldMark = 0; 127af4edf68SEd Tanous state = State::HEADERS_ALMOST_DONE; 128af4edf68SEd Tanous break; 129af4edf68SEd Tanous } 130af4edf68SEd Tanous 131af4edf68SEd Tanous index++; 132af4edf68SEd Tanous if (c == hyphen) 133af4edf68SEd Tanous { 134af4edf68SEd Tanous break; 135af4edf68SEd Tanous } 136af4edf68SEd Tanous 137af4edf68SEd Tanous if (c == colon) 138af4edf68SEd Tanous { 139af4edf68SEd Tanous if (index == 1) 140af4edf68SEd Tanous { 141af4edf68SEd Tanous return ParserError::ERROR_EMPTY_HEADER; 142af4edf68SEd Tanous } 143*ca45aa3cSEd Tanous 144*ca45aa3cSEd Tanous // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 145af4edf68SEd Tanous currentHeaderName.append(buffer + headerFieldMark, 146af4edf68SEd Tanous i - headerFieldMark); 147af4edf68SEd Tanous state = State::HEADER_VALUE_START; 148af4edf68SEd Tanous break; 149af4edf68SEd Tanous } 150af4edf68SEd Tanous cl = lower(c); 151af4edf68SEd Tanous if (cl < 'a' || cl > 'z') 152af4edf68SEd Tanous { 153af4edf68SEd Tanous return ParserError::ERROR_HEADER_NAME; 154af4edf68SEd Tanous } 155af4edf68SEd Tanous break; 156af4edf68SEd Tanous case State::HEADER_VALUE_START: 157af4edf68SEd Tanous if (c == space) 158af4edf68SEd Tanous { 159af4edf68SEd Tanous break; 160af4edf68SEd Tanous } 161af4edf68SEd Tanous headerValueMark = i; 162af4edf68SEd Tanous state = State::HEADER_VALUE; 163af4edf68SEd Tanous [[fallthrough]]; 164af4edf68SEd Tanous case State::HEADER_VALUE: 165af4edf68SEd Tanous if (c == cr) 166af4edf68SEd Tanous { 167*ca45aa3cSEd Tanous // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 168af4edf68SEd Tanous std::string_view value(buffer + headerValueMark, 169af4edf68SEd Tanous i - headerValueMark); 170af4edf68SEd Tanous mime_fields.rbegin()->fields.set(currentHeaderName, 171af4edf68SEd Tanous value); 172af4edf68SEd Tanous state = State::HEADER_VALUE_ALMOST_DONE; 173af4edf68SEd Tanous } 174af4edf68SEd Tanous break; 175af4edf68SEd Tanous case State::HEADER_VALUE_ALMOST_DONE: 176af4edf68SEd Tanous if (c != lf) 177af4edf68SEd Tanous { 178af4edf68SEd Tanous return ParserError::ERROR_HEADER_VALUE; 179af4edf68SEd Tanous } 180af4edf68SEd Tanous state = State::HEADER_FIELD_START; 181af4edf68SEd Tanous break; 182af4edf68SEd Tanous case State::HEADERS_ALMOST_DONE: 183af4edf68SEd Tanous if (c != lf) 184af4edf68SEd Tanous { 185af4edf68SEd Tanous return ParserError::ERROR_HEADER_ENDING; 186af4edf68SEd Tanous } 187af4edf68SEd Tanous state = State::PART_DATA_START; 188af4edf68SEd Tanous break; 189af4edf68SEd Tanous case State::PART_DATA_START: 190af4edf68SEd Tanous state = State::PART_DATA; 191af4edf68SEd Tanous partDataMark = i; 192af4edf68SEd Tanous [[fallthrough]]; 193af4edf68SEd Tanous case State::PART_DATA: 194af4edf68SEd Tanous if (index == 0) 195af4edf68SEd Tanous { 196af4edf68SEd Tanous skipNonBoundary(buffer, len, boundary.size() - 1, i); 197*ca45aa3cSEd Tanous 198*ca45aa3cSEd Tanous // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 199af4edf68SEd Tanous c = buffer[i]; 200af4edf68SEd Tanous } 201af4edf68SEd Tanous processPartData(prevIndex, index, buffer, i, c, state); 202af4edf68SEd Tanous break; 203af4edf68SEd Tanous case State::END: 204af4edf68SEd Tanous break; 205af4edf68SEd Tanous } 206af4edf68SEd Tanous } 207af4edf68SEd Tanous return ParserError::PARSER_SUCCESS; 208af4edf68SEd Tanous } 209af4edf68SEd Tanous std::vector<FormPart> mime_fields; 210af4edf68SEd Tanous std::string boundary; 211af4edf68SEd Tanous 212af4edf68SEd Tanous private: 213af4edf68SEd Tanous void indexBoundary() 214af4edf68SEd Tanous { 215af4edf68SEd Tanous std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0); 216af4edf68SEd Tanous for (const char current : boundary) 217af4edf68SEd Tanous { 218af4edf68SEd Tanous boundaryIndex[static_cast<unsigned char>(current)] = true; 219af4edf68SEd Tanous } 220af4edf68SEd Tanous } 221af4edf68SEd Tanous 222af4edf68SEd Tanous char lower(char c) const 223af4edf68SEd Tanous { 224af4edf68SEd Tanous return static_cast<char>(c | 0x20); 225af4edf68SEd Tanous } 226af4edf68SEd Tanous 227af4edf68SEd Tanous inline bool isBoundaryChar(char c) const 228af4edf68SEd Tanous { 229af4edf68SEd Tanous return boundaryIndex[static_cast<unsigned char>(c)]; 230af4edf68SEd Tanous } 231af4edf68SEd Tanous 232af4edf68SEd Tanous void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd, 233af4edf68SEd Tanous size_t& i) 234af4edf68SEd Tanous { 235af4edf68SEd Tanous // boyer-moore derived algorithm to safely skip non-boundary data 236af4edf68SEd Tanous while (i + boundary.size() <= len) 237af4edf68SEd Tanous { 238*ca45aa3cSEd Tanous // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 239af4edf68SEd Tanous if (isBoundaryChar(buffer[i + boundaryEnd])) 240af4edf68SEd Tanous { 241af4edf68SEd Tanous break; 242af4edf68SEd Tanous } 243af4edf68SEd Tanous i += boundary.size(); 244af4edf68SEd Tanous } 245af4edf68SEd Tanous } 246af4edf68SEd Tanous 247af4edf68SEd Tanous void processPartData(size_t& prevIndex, size_t& index, const char* buffer, 248af4edf68SEd Tanous size_t& i, char c, State& state) 249af4edf68SEd Tanous { 250af4edf68SEd Tanous prevIndex = index; 251af4edf68SEd Tanous 252af4edf68SEd Tanous if (index < boundary.size()) 253af4edf68SEd Tanous { 254af4edf68SEd Tanous if (boundary[index] == c) 255af4edf68SEd Tanous { 256af4edf68SEd Tanous if (index == 0) 257af4edf68SEd Tanous { 258*ca45aa3cSEd Tanous // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 259*ca45aa3cSEd Tanous const char* start = buffer + partDataMark; 260*ca45aa3cSEd Tanous size_t size = i - partDataMark; 261*ca45aa3cSEd Tanous mime_fields.rbegin()->content += 262*ca45aa3cSEd Tanous std::string_view(start, size); 263af4edf68SEd Tanous } 264af4edf68SEd Tanous index++; 265af4edf68SEd Tanous } 266af4edf68SEd Tanous else 267af4edf68SEd Tanous { 268af4edf68SEd Tanous index = 0; 269af4edf68SEd Tanous } 270af4edf68SEd Tanous } 271af4edf68SEd Tanous else if (index == boundary.size()) 272af4edf68SEd Tanous { 273af4edf68SEd Tanous index++; 274af4edf68SEd Tanous if (c == cr) 275af4edf68SEd Tanous { 276af4edf68SEd Tanous // cr = part boundary 277af4edf68SEd Tanous flags = Boundary::PART_BOUNDARY; 278af4edf68SEd Tanous } 279af4edf68SEd Tanous else if (c == hyphen) 280af4edf68SEd Tanous { 281af4edf68SEd Tanous // hyphen = end boundary 282af4edf68SEd Tanous flags = Boundary::END_BOUNDARY; 283af4edf68SEd Tanous } 284af4edf68SEd Tanous else 285af4edf68SEd Tanous { 286af4edf68SEd Tanous index = 0; 287af4edf68SEd Tanous } 288af4edf68SEd Tanous } 289af4edf68SEd Tanous else 290af4edf68SEd Tanous { 291af4edf68SEd Tanous if (flags == Boundary::PART_BOUNDARY) 292af4edf68SEd Tanous { 293af4edf68SEd Tanous index = 0; 294af4edf68SEd Tanous if (c == lf) 295af4edf68SEd Tanous { 296af4edf68SEd Tanous // unset the PART_BOUNDARY flag 297af4edf68SEd Tanous flags = Boundary::NON_BOUNDARY; 298af4edf68SEd Tanous mime_fields.push_back({}); 299af4edf68SEd Tanous state = State::HEADER_FIELD_START; 300af4edf68SEd Tanous return; 301af4edf68SEd Tanous } 302af4edf68SEd Tanous } 303af4edf68SEd Tanous if (flags == Boundary::END_BOUNDARY) 304af4edf68SEd Tanous { 305af4edf68SEd Tanous if (c == hyphen) 306af4edf68SEd Tanous { 307af4edf68SEd Tanous state = State::END; 308af4edf68SEd Tanous } 309af4edf68SEd Tanous } 310af4edf68SEd Tanous } 311af4edf68SEd Tanous 312af4edf68SEd Tanous if (index > 0) 313af4edf68SEd Tanous { 314af4edf68SEd Tanous lookbehind[index - 1] = c; 315af4edf68SEd Tanous } 316af4edf68SEd Tanous else if (prevIndex > 0) 317af4edf68SEd Tanous { 318af4edf68SEd Tanous // if our boundary turned out to be rubbish, the captured 319af4edf68SEd Tanous // lookbehind belongs to partData 320af4edf68SEd Tanous 321af4edf68SEd Tanous mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex); 322af4edf68SEd Tanous prevIndex = 0; 323af4edf68SEd Tanous partDataMark = i; 324af4edf68SEd Tanous 325af4edf68SEd Tanous // reconsider the current character even so it interrupted 326af4edf68SEd Tanous // the sequence it could be the beginning of a new sequence 327af4edf68SEd Tanous i--; 328af4edf68SEd Tanous } 329af4edf68SEd Tanous } 330af4edf68SEd Tanous 331af4edf68SEd Tanous std::string currentHeaderName; 332af4edf68SEd Tanous std::string currentHeaderValue; 333af4edf68SEd Tanous 334af4edf68SEd Tanous static constexpr char cr = '\r'; 335af4edf68SEd Tanous static constexpr char lf = '\n'; 336af4edf68SEd Tanous static constexpr char space = ' '; 337af4edf68SEd Tanous static constexpr char hyphen = '-'; 338af4edf68SEd Tanous static constexpr char colon = ':'; 339af4edf68SEd Tanous 340af4edf68SEd Tanous std::array<bool, 256> boundaryIndex; 341af4edf68SEd Tanous std::string lookbehind; 342af4edf68SEd Tanous State state; 343af4edf68SEd Tanous Boundary flags; 344af4edf68SEd Tanous size_t index = 0; 345af4edf68SEd Tanous size_t partDataMark = 0; 346af4edf68SEd Tanous size_t headerFieldMark = 0; 347af4edf68SEd Tanous size_t headerValueMark = 0; 348af4edf68SEd Tanous }; 349