1 #pragma once 2 3 #include <boost/algorithm/string/predicate.hpp> 4 #include <boost/beast/http/fields.hpp> 5 #include <http_request.hpp> 6 7 #include <string> 8 #include <string_view> 9 10 enum class ParserError 11 { 12 PARSER_SUCCESS, 13 ERROR_BOUNDARY_FORMAT, 14 ERROR_BOUNDARY_CR, 15 ERROR_BOUNDARY_LF, 16 ERROR_BOUNDARY_DATA, 17 ERROR_EMPTY_HEADER, 18 ERROR_HEADER_NAME, 19 ERROR_HEADER_VALUE, 20 ERROR_HEADER_ENDING 21 }; 22 23 enum class State 24 { 25 START, 26 START_BOUNDARY, 27 HEADER_FIELD_START, 28 HEADER_FIELD, 29 HEADER_VALUE_START, 30 HEADER_VALUE, 31 HEADER_VALUE_ALMOST_DONE, 32 HEADERS_ALMOST_DONE, 33 PART_DATA_START, 34 PART_DATA, 35 END 36 }; 37 38 enum class Boundary 39 { 40 NON_BOUNDARY, 41 PART_BOUNDARY, 42 END_BOUNDARY, 43 }; 44 45 struct FormPart 46 { 47 boost::beast::http::fields fields; 48 std::string content; 49 }; 50 51 class MultipartParser 52 { 53 public: 54 MultipartParser() = default; 55 56 [[nodiscard]] ParserError parse(const crow::Request& req) 57 { 58 std::string_view contentType = req.getHeaderValue("content-type"); 59 60 const std::string boundaryFormat = "multipart/form-data; boundary="; 61 if (!boost::starts_with(req.getHeaderValue("content-type"), 62 boundaryFormat)) 63 { 64 return ParserError::ERROR_BOUNDARY_FORMAT; 65 } 66 67 std::string_view ctBoundary = contentType.substr(boundaryFormat.size()); 68 69 boundary = "\r\n--"; 70 boundary += ctBoundary; 71 indexBoundary(); 72 lookbehind.resize(boundary.size() + 8); 73 state = State::START; 74 75 const char* buffer = req.body.data(); 76 size_t len = req.body.size(); 77 size_t prevIndex = index; 78 char cl = 0; 79 80 for (size_t i = 0; i < len; i++) 81 { 82 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 83 char c = buffer[i]; 84 switch (state) 85 { 86 case State::START: 87 index = 0; 88 state = State::START_BOUNDARY; 89 [[fallthrough]]; 90 case State::START_BOUNDARY: 91 if (index == boundary.size() - 2) 92 { 93 if (c != cr) 94 { 95 return ParserError::ERROR_BOUNDARY_CR; 96 } 97 index++; 98 break; 99 } 100 else if (index - 1 == boundary.size() - 2) 101 { 102 if (c != lf) 103 { 104 return ParserError::ERROR_BOUNDARY_LF; 105 } 106 index = 0; 107 mime_fields.push_back({}); 108 state = State::HEADER_FIELD_START; 109 break; 110 } 111 if (c != boundary[index + 2]) 112 { 113 return ParserError::ERROR_BOUNDARY_DATA; 114 } 115 index++; 116 break; 117 case State::HEADER_FIELD_START: 118 currentHeaderName.resize(0); 119 state = State::HEADER_FIELD; 120 headerFieldMark = i; 121 index = 0; 122 [[fallthrough]]; 123 case State::HEADER_FIELD: 124 if (c == cr) 125 { 126 headerFieldMark = 0; 127 state = State::HEADERS_ALMOST_DONE; 128 break; 129 } 130 131 index++; 132 if (c == hyphen) 133 { 134 break; 135 } 136 137 if (c == colon) 138 { 139 if (index == 1) 140 { 141 return ParserError::ERROR_EMPTY_HEADER; 142 } 143 144 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 145 currentHeaderName.append(buffer + headerFieldMark, 146 i - headerFieldMark); 147 state = State::HEADER_VALUE_START; 148 break; 149 } 150 cl = lower(c); 151 if (cl < 'a' || cl > 'z') 152 { 153 return ParserError::ERROR_HEADER_NAME; 154 } 155 break; 156 case State::HEADER_VALUE_START: 157 if (c == space) 158 { 159 break; 160 } 161 headerValueMark = i; 162 state = State::HEADER_VALUE; 163 [[fallthrough]]; 164 case State::HEADER_VALUE: 165 if (c == cr) 166 { 167 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 168 std::string_view value(buffer + headerValueMark, 169 i - headerValueMark); 170 mime_fields.rbegin()->fields.set(currentHeaderName, 171 value); 172 state = State::HEADER_VALUE_ALMOST_DONE; 173 } 174 break; 175 case State::HEADER_VALUE_ALMOST_DONE: 176 if (c != lf) 177 { 178 return ParserError::ERROR_HEADER_VALUE; 179 } 180 state = State::HEADER_FIELD_START; 181 break; 182 case State::HEADERS_ALMOST_DONE: 183 if (c != lf) 184 { 185 return ParserError::ERROR_HEADER_ENDING; 186 } 187 state = State::PART_DATA_START; 188 break; 189 case State::PART_DATA_START: 190 state = State::PART_DATA; 191 partDataMark = i; 192 [[fallthrough]]; 193 case State::PART_DATA: 194 if (index == 0) 195 { 196 skipNonBoundary(buffer, len, boundary.size() - 1, i); 197 198 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 199 c = buffer[i]; 200 } 201 processPartData(prevIndex, index, buffer, i, c, state); 202 break; 203 case State::END: 204 break; 205 } 206 } 207 return ParserError::PARSER_SUCCESS; 208 } 209 std::vector<FormPart> mime_fields; 210 std::string boundary; 211 212 private: 213 void indexBoundary() 214 { 215 std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0); 216 for (const char current : boundary) 217 { 218 boundaryIndex[static_cast<unsigned char>(current)] = true; 219 } 220 } 221 222 static char lower(char c) 223 { 224 return static_cast<char>(c | 0x20); 225 } 226 227 inline bool isBoundaryChar(char c) const 228 { 229 return boundaryIndex[static_cast<unsigned char>(c)]; 230 } 231 232 void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd, 233 size_t& i) 234 { 235 // boyer-moore derived algorithm to safely skip non-boundary data 236 while (i + boundary.size() <= len) 237 { 238 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 239 if (isBoundaryChar(buffer[i + boundaryEnd])) 240 { 241 break; 242 } 243 i += boundary.size(); 244 } 245 } 246 247 void processPartData(size_t& prevIndex, size_t& index, const char* buffer, 248 size_t& i, char c, State& state) 249 { 250 prevIndex = index; 251 252 if (index < boundary.size()) 253 { 254 if (boundary[index] == c) 255 { 256 if (index == 0) 257 { 258 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 259 const char* start = buffer + partDataMark; 260 size_t size = i - partDataMark; 261 mime_fields.rbegin()->content += 262 std::string_view(start, size); 263 } 264 index++; 265 } 266 else 267 { 268 index = 0; 269 } 270 } 271 else if (index == boundary.size()) 272 { 273 index++; 274 if (c == cr) 275 { 276 // cr = part boundary 277 flags = Boundary::PART_BOUNDARY; 278 } 279 else if (c == hyphen) 280 { 281 // hyphen = end boundary 282 flags = Boundary::END_BOUNDARY; 283 } 284 else 285 { 286 index = 0; 287 } 288 } 289 else 290 { 291 if (flags == Boundary::PART_BOUNDARY) 292 { 293 index = 0; 294 if (c == lf) 295 { 296 // unset the PART_BOUNDARY flag 297 flags = Boundary::NON_BOUNDARY; 298 mime_fields.push_back({}); 299 state = State::HEADER_FIELD_START; 300 return; 301 } 302 } 303 if (flags == Boundary::END_BOUNDARY) 304 { 305 if (c == hyphen) 306 { 307 state = State::END; 308 } 309 } 310 } 311 312 if (index > 0) 313 { 314 lookbehind[index - 1] = c; 315 } 316 else if (prevIndex > 0) 317 { 318 // if our boundary turned out to be rubbish, the captured 319 // lookbehind belongs to partData 320 321 mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex); 322 prevIndex = 0; 323 partDataMark = i; 324 325 // reconsider the current character even so it interrupted 326 // the sequence it could be the beginning of a new sequence 327 i--; 328 } 329 } 330 331 std::string currentHeaderName; 332 std::string currentHeaderValue; 333 334 static constexpr char cr = '\r'; 335 static constexpr char lf = '\n'; 336 static constexpr char space = ' '; 337 static constexpr char hyphen = '-'; 338 static constexpr char colon = ':'; 339 340 std::array<bool, 256> boundaryIndex{}; 341 std::string lookbehind; 342 State state{State::START}; 343 Boundary flags{Boundary::NON_BOUNDARY}; 344 size_t index = 0; 345 size_t partDataMark = 0; 346 size_t headerFieldMark = 0; 347 size_t headerValueMark = 0; 348 }; 349