1 #pragma once 2 3 #include <boost/beast/http/fields.hpp> 4 #include <http_request.hpp> 5 6 #include <string> 7 #include <string_view> 8 9 enum class ParserError 10 { 11 PARSER_SUCCESS, 12 ERROR_BOUNDARY_FORMAT, 13 ERROR_BOUNDARY_CR, 14 ERROR_BOUNDARY_LF, 15 ERROR_BOUNDARY_DATA, 16 ERROR_EMPTY_HEADER, 17 ERROR_HEADER_NAME, 18 ERROR_HEADER_VALUE, 19 ERROR_HEADER_ENDING, 20 ERROR_UNEXPECTED_END_OF_HEADER, 21 ERROR_UNEXPECTED_END_OF_INPUT, 22 ERROR_OUT_OF_RANGE 23 }; 24 25 enum class State 26 { 27 START, 28 START_BOUNDARY, 29 HEADER_FIELD_START, 30 HEADER_FIELD, 31 HEADER_VALUE_START, 32 HEADER_VALUE, 33 HEADER_VALUE_ALMOST_DONE, 34 HEADERS_ALMOST_DONE, 35 PART_DATA_START, 36 PART_DATA, 37 END 38 }; 39 40 enum class Boundary 41 { 42 NON_BOUNDARY, 43 PART_BOUNDARY, 44 END_BOUNDARY, 45 }; 46 47 struct FormPart 48 { 49 boost::beast::http::fields fields; 50 std::string content; 51 }; 52 53 class MultipartParser 54 { 55 public: 56 MultipartParser() = default; 57 58 [[nodiscard]] ParserError parse(const crow::Request& req) 59 { 60 std::string_view contentType = req.getHeaderValue("content-type"); 61 62 const std::string boundaryFormat = "multipart/form-data; boundary="; 63 if (!contentType.starts_with(boundaryFormat)) 64 { 65 return ParserError::ERROR_BOUNDARY_FORMAT; 66 } 67 68 std::string_view ctBoundary = contentType.substr(boundaryFormat.size()); 69 70 boundary = "\r\n--"; 71 boundary += ctBoundary; 72 indexBoundary(); 73 lookbehind.resize(boundary.size() + 8); 74 state = State::START; 75 76 const char* buffer = req.body.data(); 77 size_t len = req.body.size(); 78 char cl = 0; 79 80 for (size_t i = 0; i < len; i++) 81 { 82 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 83 char c = buffer[i]; 84 switch (state) 85 { 86 case State::START: 87 index = 0; 88 state = State::START_BOUNDARY; 89 [[fallthrough]]; 90 case State::START_BOUNDARY: 91 if (index == boundary.size() - 2) 92 { 93 if (c != cr) 94 { 95 return ParserError::ERROR_BOUNDARY_CR; 96 } 97 index++; 98 break; 99 } 100 else if (index - 1 == boundary.size() - 2) 101 { 102 if (c != lf) 103 { 104 return ParserError::ERROR_BOUNDARY_LF; 105 } 106 index = 0; 107 mime_fields.push_back({}); 108 state = State::HEADER_FIELD_START; 109 break; 110 } 111 if (c != boundary[index + 2]) 112 { 113 return ParserError::ERROR_BOUNDARY_DATA; 114 } 115 index++; 116 break; 117 case State::HEADER_FIELD_START: 118 currentHeaderName.resize(0); 119 state = State::HEADER_FIELD; 120 headerFieldMark = i; 121 index = 0; 122 [[fallthrough]]; 123 case State::HEADER_FIELD: 124 if (c == cr) 125 { 126 headerFieldMark = 0; 127 state = State::HEADERS_ALMOST_DONE; 128 break; 129 } 130 131 index++; 132 if (c == hyphen) 133 { 134 break; 135 } 136 137 if (c == colon) 138 { 139 if (index == 1) 140 { 141 return ParserError::ERROR_EMPTY_HEADER; 142 } 143 144 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 145 currentHeaderName.append(buffer + headerFieldMark, 146 i - headerFieldMark); 147 state = State::HEADER_VALUE_START; 148 break; 149 } 150 cl = lower(c); 151 if (cl < 'a' || cl > 'z') 152 { 153 return ParserError::ERROR_HEADER_NAME; 154 } 155 break; 156 case State::HEADER_VALUE_START: 157 if (c == space) 158 { 159 break; 160 } 161 headerValueMark = i; 162 state = State::HEADER_VALUE; 163 [[fallthrough]]; 164 case State::HEADER_VALUE: 165 if (c == cr) 166 { 167 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 168 std::string_view value(buffer + headerValueMark, 169 i - headerValueMark); 170 mime_fields.rbegin()->fields.set(currentHeaderName, 171 value); 172 state = State::HEADER_VALUE_ALMOST_DONE; 173 } 174 break; 175 case State::HEADER_VALUE_ALMOST_DONE: 176 if (c != lf) 177 { 178 return ParserError::ERROR_HEADER_VALUE; 179 } 180 state = State::HEADER_FIELD_START; 181 break; 182 case State::HEADERS_ALMOST_DONE: 183 if (c != lf) 184 { 185 return ParserError::ERROR_HEADER_ENDING; 186 } 187 if (index > 0) 188 { 189 return ParserError::ERROR_UNEXPECTED_END_OF_HEADER; 190 } 191 state = State::PART_DATA_START; 192 break; 193 case State::PART_DATA_START: 194 state = State::PART_DATA; 195 partDataMark = i; 196 [[fallthrough]]; 197 case State::PART_DATA: 198 { 199 if (index == 0) 200 { 201 skipNonBoundary(buffer, len, boundary.size() - 1, i); 202 203 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 204 c = buffer[i]; 205 } 206 const ParserError ec = processPartData(buffer, i, c); 207 if (ec != ParserError::PARSER_SUCCESS) 208 { 209 return ec; 210 } 211 break; 212 } 213 case State::END: 214 break; 215 } 216 } 217 218 if (state != State::END) 219 { 220 return ParserError::ERROR_UNEXPECTED_END_OF_INPUT; 221 } 222 223 return ParserError::PARSER_SUCCESS; 224 } 225 std::vector<FormPart> mime_fields; 226 std::string boundary; 227 228 private: 229 void indexBoundary() 230 { 231 std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0); 232 for (const char current : boundary) 233 { 234 boundaryIndex[static_cast<unsigned char>(current)] = true; 235 } 236 } 237 238 static char lower(char c) 239 { 240 return static_cast<char>(c | 0x20); 241 } 242 243 inline bool isBoundaryChar(char c) const 244 { 245 return boundaryIndex[static_cast<unsigned char>(c)]; 246 } 247 248 void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd, 249 size_t& i) 250 { 251 // boyer-moore derived algorithm to safely skip non-boundary data 252 while (i + boundary.size() <= len) 253 { 254 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 255 if (isBoundaryChar(buffer[i + boundaryEnd])) 256 { 257 break; 258 } 259 i += boundary.size(); 260 } 261 } 262 263 ParserError processPartData(const char* buffer, size_t& i, char c) 264 { 265 size_t prevIndex = index; 266 267 if (index < boundary.size()) 268 { 269 if (boundary[index] == c) 270 { 271 if (index == 0) 272 { 273 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 274 const char* start = buffer + partDataMark; 275 size_t size = i - partDataMark; 276 mime_fields.rbegin()->content += 277 std::string_view(start, size); 278 } 279 index++; 280 } 281 else 282 { 283 index = 0; 284 } 285 } 286 else if (index == boundary.size()) 287 { 288 index++; 289 if (c == cr) 290 { 291 // cr = part boundary 292 flags = Boundary::PART_BOUNDARY; 293 } 294 else if (c == hyphen) 295 { 296 // hyphen = end boundary 297 flags = Boundary::END_BOUNDARY; 298 } 299 else 300 { 301 index = 0; 302 } 303 } 304 else 305 { 306 if (flags == Boundary::PART_BOUNDARY) 307 { 308 index = 0; 309 if (c == lf) 310 { 311 // unset the PART_BOUNDARY flag 312 flags = Boundary::NON_BOUNDARY; 313 mime_fields.push_back({}); 314 state = State::HEADER_FIELD_START; 315 return ParserError::PARSER_SUCCESS; 316 } 317 } 318 if (flags == Boundary::END_BOUNDARY) 319 { 320 if (c == hyphen) 321 { 322 state = State::END; 323 } 324 else 325 { 326 flags = Boundary::NON_BOUNDARY; 327 index = 0; 328 } 329 } 330 } 331 332 if (index > 0) 333 { 334 if ((index - 1) >= lookbehind.size()) 335 { 336 // Should never happen, but when it does it won't cause crash 337 return ParserError::ERROR_OUT_OF_RANGE; 338 } 339 lookbehind[index - 1] = c; 340 } 341 else if (prevIndex > 0) 342 { 343 // if our boundary turned out to be rubbish, the captured 344 // lookbehind belongs to partData 345 346 mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex); 347 partDataMark = i; 348 349 // reconsider the current character even so it interrupted 350 // the sequence it could be the beginning of a new sequence 351 i--; 352 } 353 return ParserError::PARSER_SUCCESS; 354 } 355 356 std::string currentHeaderName; 357 std::string currentHeaderValue; 358 359 static constexpr char cr = '\r'; 360 static constexpr char lf = '\n'; 361 static constexpr char space = ' '; 362 static constexpr char hyphen = '-'; 363 static constexpr char colon = ':'; 364 365 std::array<bool, 256> boundaryIndex{}; 366 std::string lookbehind; 367 State state{State::START}; 368 Boundary flags{Boundary::NON_BOUNDARY}; 369 size_t index = 0; 370 size_t partDataMark = 0; 371 size_t headerFieldMark = 0; 372 size_t headerValueMark = 0; 373 }; 374