1 #pragma once 2 3 #include "http_request.hpp" 4 5 #include <boost/beast/http/fields.hpp> 6 7 #include <string> 8 #include <string_view> 9 10 enum class ParserError 11 { 12 PARSER_SUCCESS, 13 ERROR_BOUNDARY_FORMAT, 14 ERROR_BOUNDARY_CR, 15 ERROR_BOUNDARY_LF, 16 ERROR_BOUNDARY_DATA, 17 ERROR_EMPTY_HEADER, 18 ERROR_HEADER_NAME, 19 ERROR_HEADER_VALUE, 20 ERROR_HEADER_ENDING, 21 ERROR_UNEXPECTED_END_OF_HEADER, 22 ERROR_UNEXPECTED_END_OF_INPUT, 23 ERROR_OUT_OF_RANGE 24 }; 25 26 enum class State 27 { 28 START, 29 START_BOUNDARY, 30 HEADER_FIELD_START, 31 HEADER_FIELD, 32 HEADER_VALUE_START, 33 HEADER_VALUE, 34 HEADER_VALUE_ALMOST_DONE, 35 HEADERS_ALMOST_DONE, 36 PART_DATA_START, 37 PART_DATA, 38 END 39 }; 40 41 enum class Boundary 42 { 43 NON_BOUNDARY, 44 PART_BOUNDARY, 45 END_BOUNDARY, 46 }; 47 48 struct FormPart 49 { 50 boost::beast::http::fields fields; 51 std::string content; 52 }; 53 54 class MultipartParser 55 { 56 public: 57 MultipartParser() = default; 58 59 [[nodiscard]] ParserError parse(const crow::Request& req) 60 { 61 std::string_view contentType = req.getHeaderValue("content-type"); 62 63 const std::string boundaryFormat = "multipart/form-data; boundary="; 64 if (!contentType.starts_with(boundaryFormat)) 65 { 66 return ParserError::ERROR_BOUNDARY_FORMAT; 67 } 68 69 std::string_view ctBoundary = contentType.substr(boundaryFormat.size()); 70 71 boundary = "\r\n--"; 72 boundary += ctBoundary; 73 indexBoundary(); 74 lookbehind.resize(boundary.size() + 8); 75 state = State::START; 76 77 const char* buffer = req.body().data(); 78 size_t len = req.body().size(); 79 char cl = 0; 80 81 for (size_t i = 0; i < len; i++) 82 { 83 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 84 char c = buffer[i]; 85 switch (state) 86 { 87 case State::START: 88 index = 0; 89 state = State::START_BOUNDARY; 90 [[fallthrough]]; 91 case State::START_BOUNDARY: 92 if (index == boundary.size() - 2) 93 { 94 if (c != cr) 95 { 96 return ParserError::ERROR_BOUNDARY_CR; 97 } 98 index++; 99 break; 100 } 101 else if (index - 1 == boundary.size() - 2) 102 { 103 if (c != lf) 104 { 105 return ParserError::ERROR_BOUNDARY_LF; 106 } 107 index = 0; 108 mime_fields.push_back({}); 109 state = State::HEADER_FIELD_START; 110 break; 111 } 112 if (c != boundary[index + 2]) 113 { 114 return ParserError::ERROR_BOUNDARY_DATA; 115 } 116 index++; 117 break; 118 case State::HEADER_FIELD_START: 119 currentHeaderName.resize(0); 120 state = State::HEADER_FIELD; 121 headerFieldMark = i; 122 index = 0; 123 [[fallthrough]]; 124 case State::HEADER_FIELD: 125 if (c == cr) 126 { 127 headerFieldMark = 0; 128 state = State::HEADERS_ALMOST_DONE; 129 break; 130 } 131 132 index++; 133 if (c == hyphen) 134 { 135 break; 136 } 137 138 if (c == colon) 139 { 140 if (index == 1) 141 { 142 return ParserError::ERROR_EMPTY_HEADER; 143 } 144 145 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 146 currentHeaderName.append(buffer + headerFieldMark, 147 i - headerFieldMark); 148 state = State::HEADER_VALUE_START; 149 break; 150 } 151 cl = lower(c); 152 if (cl < 'a' || cl > 'z') 153 { 154 return ParserError::ERROR_HEADER_NAME; 155 } 156 break; 157 case State::HEADER_VALUE_START: 158 if (c == space) 159 { 160 break; 161 } 162 headerValueMark = i; 163 state = State::HEADER_VALUE; 164 [[fallthrough]]; 165 case State::HEADER_VALUE: 166 if (c == cr) 167 { 168 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 169 std::string_view value(buffer + headerValueMark, 170 i - headerValueMark); 171 mime_fields.rbegin()->fields.set(currentHeaderName, 172 value); 173 state = State::HEADER_VALUE_ALMOST_DONE; 174 } 175 break; 176 case State::HEADER_VALUE_ALMOST_DONE: 177 if (c != lf) 178 { 179 return ParserError::ERROR_HEADER_VALUE; 180 } 181 state = State::HEADER_FIELD_START; 182 break; 183 case State::HEADERS_ALMOST_DONE: 184 if (c != lf) 185 { 186 return ParserError::ERROR_HEADER_ENDING; 187 } 188 if (index > 0) 189 { 190 return ParserError::ERROR_UNEXPECTED_END_OF_HEADER; 191 } 192 state = State::PART_DATA_START; 193 break; 194 case State::PART_DATA_START: 195 state = State::PART_DATA; 196 partDataMark = i; 197 [[fallthrough]]; 198 case State::PART_DATA: 199 { 200 if (index == 0) 201 { 202 skipNonBoundary(buffer, len, boundary.size() - 1, i); 203 204 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 205 c = buffer[i]; 206 } 207 const ParserError ec = processPartData(buffer, i, c); 208 if (ec != ParserError::PARSER_SUCCESS) 209 { 210 return ec; 211 } 212 break; 213 } 214 case State::END: 215 break; 216 } 217 } 218 219 if (state != State::END) 220 { 221 return ParserError::ERROR_UNEXPECTED_END_OF_INPUT; 222 } 223 224 return ParserError::PARSER_SUCCESS; 225 } 226 std::vector<FormPart> mime_fields; 227 std::string boundary; 228 229 private: 230 void indexBoundary() 231 { 232 std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0); 233 for (const char current : boundary) 234 { 235 boundaryIndex[static_cast<unsigned char>(current)] = true; 236 } 237 } 238 239 static char lower(char c) 240 { 241 return static_cast<char>(c | 0x20); 242 } 243 244 inline bool isBoundaryChar(char c) const 245 { 246 return boundaryIndex[static_cast<unsigned char>(c)]; 247 } 248 249 void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd, 250 size_t& i) 251 { 252 // boyer-moore derived algorithm to safely skip non-boundary data 253 while (i + boundary.size() <= len) 254 { 255 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 256 if (isBoundaryChar(buffer[i + boundaryEnd])) 257 { 258 break; 259 } 260 i += boundary.size(); 261 } 262 } 263 264 ParserError processPartData(const char* buffer, size_t& i, char c) 265 { 266 size_t prevIndex = index; 267 268 if (index < boundary.size()) 269 { 270 if (boundary[index] == c) 271 { 272 if (index == 0) 273 { 274 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 275 const char* start = buffer + partDataMark; 276 size_t size = i - partDataMark; 277 mime_fields.rbegin()->content += std::string_view(start, 278 size); 279 } 280 index++; 281 } 282 else 283 { 284 index = 0; 285 } 286 } 287 else if (index == boundary.size()) 288 { 289 index++; 290 if (c == cr) 291 { 292 // cr = part boundary 293 flags = Boundary::PART_BOUNDARY; 294 } 295 else if (c == hyphen) 296 { 297 // hyphen = end boundary 298 flags = Boundary::END_BOUNDARY; 299 } 300 else 301 { 302 index = 0; 303 } 304 } 305 else 306 { 307 if (flags == Boundary::PART_BOUNDARY) 308 { 309 index = 0; 310 if (c == lf) 311 { 312 // unset the PART_BOUNDARY flag 313 flags = Boundary::NON_BOUNDARY; 314 mime_fields.push_back({}); 315 state = State::HEADER_FIELD_START; 316 return ParserError::PARSER_SUCCESS; 317 } 318 } 319 if (flags == Boundary::END_BOUNDARY) 320 { 321 if (c == hyphen) 322 { 323 state = State::END; 324 } 325 else 326 { 327 flags = Boundary::NON_BOUNDARY; 328 index = 0; 329 } 330 } 331 } 332 333 if (index > 0) 334 { 335 if ((index - 1) >= lookbehind.size()) 336 { 337 // Should never happen, but when it does it won't cause crash 338 return ParserError::ERROR_OUT_OF_RANGE; 339 } 340 lookbehind[index - 1] = c; 341 } 342 else if (prevIndex > 0) 343 { 344 // if our boundary turned out to be rubbish, the captured 345 // lookbehind belongs to partData 346 347 mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex); 348 partDataMark = i; 349 350 // reconsider the current character even so it interrupted 351 // the sequence it could be the beginning of a new sequence 352 i--; 353 } 354 return ParserError::PARSER_SUCCESS; 355 } 356 357 std::string currentHeaderName; 358 std::string currentHeaderValue; 359 360 static constexpr char cr = '\r'; 361 static constexpr char lf = '\n'; 362 static constexpr char space = ' '; 363 static constexpr char hyphen = '-'; 364 static constexpr char colon = ':'; 365 366 std::array<bool, 256> boundaryIndex{}; 367 std::string lookbehind; 368 State state{State::START}; 369 Boundary flags{Boundary::NON_BOUNDARY}; 370 size_t index = 0; 371 size_t partDataMark = 0; 372 size_t headerFieldMark = 0; 373 size_t headerValueMark = 0; 374 }; 375