1 #pragma once 2 3 #include "http_request.hpp" 4 5 #include <boost/beast/http/fields.hpp> 6 7 #include <string> 8 #include <string_view> 9 10 enum class ParserError 11 { 12 PARSER_SUCCESS, 13 ERROR_BOUNDARY_FORMAT, 14 ERROR_BOUNDARY_CR, 15 ERROR_BOUNDARY_LF, 16 ERROR_BOUNDARY_DATA, 17 ERROR_EMPTY_HEADER, 18 ERROR_HEADER_NAME, 19 ERROR_HEADER_VALUE, 20 ERROR_HEADER_ENDING, 21 ERROR_UNEXPECTED_END_OF_HEADER, 22 ERROR_UNEXPECTED_END_OF_INPUT, 23 ERROR_OUT_OF_RANGE 24 }; 25 26 enum class State 27 { 28 START, 29 START_BOUNDARY, 30 HEADER_FIELD_START, 31 HEADER_FIELD, 32 HEADER_VALUE_START, 33 HEADER_VALUE, 34 HEADER_VALUE_ALMOST_DONE, 35 HEADERS_ALMOST_DONE, 36 PART_DATA_START, 37 PART_DATA, 38 END 39 }; 40 41 enum class Boundary 42 { 43 NON_BOUNDARY, 44 PART_BOUNDARY, 45 END_BOUNDARY, 46 }; 47 48 struct FormPart 49 { 50 boost::beast::http::fields fields; 51 std::string content; 52 }; 53 54 class MultipartParser 55 { 56 public: 57 MultipartParser() = default; 58 59 [[nodiscard]] ParserError parse(const crow::Request& req) 60 { 61 std::string_view contentType = req.getHeaderValue("content-type"); 62 63 const std::string boundaryFormat = "multipart/form-data; boundary="; 64 if (!contentType.starts_with(boundaryFormat)) 65 { 66 return ParserError::ERROR_BOUNDARY_FORMAT; 67 } 68 69 std::string_view ctBoundary = contentType.substr(boundaryFormat.size()); 70 71 boundary = "\r\n--"; 72 boundary += ctBoundary; 73 indexBoundary(); 74 lookbehind.resize(boundary.size() + 8); 75 state = State::START; 76 77 const std::string& buffer = req.body(); 78 size_t len = buffer.size(); 79 char cl = 0; 80 81 for (size_t i = 0; i < len; i++) 82 { 83 char c = buffer[i]; 84 switch (state) 85 { 86 case State::START: 87 index = 0; 88 state = State::START_BOUNDARY; 89 [[fallthrough]]; 90 case State::START_BOUNDARY: 91 if (index == boundary.size() - 2) 92 { 93 if (c != cr) 94 { 95 return ParserError::ERROR_BOUNDARY_CR; 96 } 97 index++; 98 break; 99 } 100 else if (index - 1 == boundary.size() - 2) 101 { 102 if (c != lf) 103 { 104 return ParserError::ERROR_BOUNDARY_LF; 105 } 106 index = 0; 107 mime_fields.push_back({}); 108 state = State::HEADER_FIELD_START; 109 break; 110 } 111 if (c != boundary[index + 2]) 112 { 113 return ParserError::ERROR_BOUNDARY_DATA; 114 } 115 index++; 116 break; 117 case State::HEADER_FIELD_START: 118 currentHeaderName.resize(0); 119 state = State::HEADER_FIELD; 120 headerFieldMark = i; 121 index = 0; 122 [[fallthrough]]; 123 case State::HEADER_FIELD: 124 if (c == cr) 125 { 126 headerFieldMark = 0; 127 state = State::HEADERS_ALMOST_DONE; 128 break; 129 } 130 131 index++; 132 if (c == hyphen) 133 { 134 break; 135 } 136 137 if (c == colon) 138 { 139 if (index == 1) 140 { 141 return ParserError::ERROR_EMPTY_HEADER; 142 } 143 144 currentHeaderName.append(&buffer[headerFieldMark], 145 i - headerFieldMark); 146 state = State::HEADER_VALUE_START; 147 break; 148 } 149 cl = lower(c); 150 if (cl < 'a' || cl > 'z') 151 { 152 return ParserError::ERROR_HEADER_NAME; 153 } 154 break; 155 case State::HEADER_VALUE_START: 156 if (c == space) 157 { 158 break; 159 } 160 headerValueMark = i; 161 state = State::HEADER_VALUE; 162 [[fallthrough]]; 163 case State::HEADER_VALUE: 164 if (c == cr) 165 { 166 std::string_view value(&buffer[headerValueMark], 167 i - headerValueMark); 168 mime_fields.rbegin()->fields.set(currentHeaderName, 169 value); 170 state = State::HEADER_VALUE_ALMOST_DONE; 171 } 172 break; 173 case State::HEADER_VALUE_ALMOST_DONE: 174 if (c != lf) 175 { 176 return ParserError::ERROR_HEADER_VALUE; 177 } 178 state = State::HEADER_FIELD_START; 179 break; 180 case State::HEADERS_ALMOST_DONE: 181 if (c != lf) 182 { 183 return ParserError::ERROR_HEADER_ENDING; 184 } 185 if (index > 0) 186 { 187 return ParserError::ERROR_UNEXPECTED_END_OF_HEADER; 188 } 189 state = State::PART_DATA_START; 190 break; 191 case State::PART_DATA_START: 192 state = State::PART_DATA; 193 partDataMark = i; 194 [[fallthrough]]; 195 case State::PART_DATA: 196 { 197 if (index == 0) 198 { 199 skipNonBoundary(buffer, boundary.size() - 1, i); 200 c = buffer[i]; 201 } 202 if (auto ec = processPartData(buffer, i, c); 203 ec != ParserError::PARSER_SUCCESS) 204 { 205 return ec; 206 } 207 break; 208 } 209 case State::END: 210 break; 211 } 212 } 213 214 if (state != State::END) 215 { 216 return ParserError::ERROR_UNEXPECTED_END_OF_INPUT; 217 } 218 219 return ParserError::PARSER_SUCCESS; 220 } 221 std::vector<FormPart> mime_fields; 222 std::string boundary; 223 224 private: 225 void indexBoundary() 226 { 227 std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0); 228 for (const char current : boundary) 229 { 230 boundaryIndex[static_cast<unsigned char>(current)] = true; 231 } 232 } 233 234 static char lower(char c) 235 { 236 return static_cast<char>(c | 0x20); 237 } 238 239 inline bool isBoundaryChar(char c) const 240 { 241 return boundaryIndex[static_cast<unsigned char>(c)]; 242 } 243 244 void skipNonBoundary(const std::string& buffer, size_t boundaryEnd, 245 size_t& i) 246 { 247 // boyer-moore derived algorithm to safely skip non-boundary data 248 while (i + boundary.size() <= buffer.length()) 249 { 250 if (isBoundaryChar(buffer[i + boundaryEnd])) 251 { 252 break; 253 } 254 i += boundary.size(); 255 } 256 } 257 258 ParserError processPartData(const std::string& buffer, size_t& i, char c) 259 { 260 size_t prevIndex = index; 261 262 if (index < boundary.size()) 263 { 264 if (boundary[index] == c) 265 { 266 if (index == 0) 267 { 268 const char* start = &buffer[partDataMark]; 269 size_t size = i - partDataMark; 270 mime_fields.rbegin()->content += std::string_view(start, 271 size); 272 } 273 index++; 274 } 275 else 276 { 277 index = 0; 278 } 279 } 280 else if (index == boundary.size()) 281 { 282 index++; 283 if (c == cr) 284 { 285 // cr = part boundary 286 flags = Boundary::PART_BOUNDARY; 287 } 288 else if (c == hyphen) 289 { 290 // hyphen = end boundary 291 flags = Boundary::END_BOUNDARY; 292 } 293 else 294 { 295 index = 0; 296 } 297 } 298 else 299 { 300 if (flags == Boundary::PART_BOUNDARY) 301 { 302 index = 0; 303 if (c == lf) 304 { 305 // unset the PART_BOUNDARY flag 306 flags = Boundary::NON_BOUNDARY; 307 mime_fields.push_back({}); 308 state = State::HEADER_FIELD_START; 309 return ParserError::PARSER_SUCCESS; 310 } 311 } 312 if (flags == Boundary::END_BOUNDARY) 313 { 314 if (c == hyphen) 315 { 316 state = State::END; 317 } 318 else 319 { 320 flags = Boundary::NON_BOUNDARY; 321 index = 0; 322 } 323 } 324 } 325 326 if (index > 0) 327 { 328 if ((index - 1) >= lookbehind.size()) 329 { 330 // Should never happen, but when it does it won't cause crash 331 return ParserError::ERROR_OUT_OF_RANGE; 332 } 333 lookbehind[index - 1] = c; 334 } 335 else if (prevIndex > 0) 336 { 337 // if our boundary turned out to be rubbish, the captured 338 // lookbehind belongs to partData 339 340 mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex); 341 partDataMark = i; 342 343 // reconsider the current character even so it interrupted 344 // the sequence it could be the beginning of a new sequence 345 i--; 346 } 347 return ParserError::PARSER_SUCCESS; 348 } 349 350 std::string currentHeaderName; 351 std::string currentHeaderValue; 352 353 static constexpr char cr = '\r'; 354 static constexpr char lf = '\n'; 355 static constexpr char space = ' '; 356 static constexpr char hyphen = '-'; 357 static constexpr char colon = ':'; 358 359 std::array<bool, 256> boundaryIndex{}; 360 std::string lookbehind; 361 State state{State::START}; 362 Boundary flags{Boundary::NON_BOUNDARY}; 363 size_t index = 0; 364 size_t partDataMark = 0; 365 size_t headerFieldMark = 0; 366 size_t headerValueMark = 0; 367 }; 368