1 // SPDX-License-Identifier: Apache-2.0 2 // SPDX-FileCopyrightText: Copyright OpenBMC Authors 3 #pragma once 4 5 #include "http_request.hpp" 6 7 #include <boost/beast/http/fields.hpp> 8 9 #include <ranges> 10 #include <string> 11 #include <string_view> 12 13 enum class ParserError 14 { 15 PARSER_SUCCESS, 16 ERROR_BOUNDARY_FORMAT, 17 ERROR_BOUNDARY_CR, 18 ERROR_BOUNDARY_LF, 19 ERROR_BOUNDARY_DATA, 20 ERROR_EMPTY_HEADER, 21 ERROR_HEADER_NAME, 22 ERROR_HEADER_VALUE, 23 ERROR_HEADER_ENDING, 24 ERROR_UNEXPECTED_END_OF_HEADER, 25 ERROR_UNEXPECTED_END_OF_INPUT, 26 ERROR_OUT_OF_RANGE 27 }; 28 29 enum class State 30 { 31 START, 32 START_BOUNDARY, 33 HEADER_FIELD_START, 34 HEADER_FIELD, 35 HEADER_VALUE_START, 36 HEADER_VALUE, 37 HEADER_VALUE_ALMOST_DONE, 38 HEADERS_ALMOST_DONE, 39 PART_DATA_START, 40 PART_DATA, 41 END 42 }; 43 44 enum class Boundary 45 { 46 NON_BOUNDARY, 47 PART_BOUNDARY, 48 END_BOUNDARY, 49 }; 50 51 struct FormPart 52 { 53 boost::beast::http::fields fields; 54 std::string content; 55 }; 56 57 class MultipartParser 58 { 59 public: 60 MultipartParser() = default; 61 parse(const crow::Request & req)62 [[nodiscard]] ParserError parse(const crow::Request& req) 63 { 64 std::string_view contentType = req.getHeaderValue("content-type"); 65 66 const std::string boundaryFormat = "multipart/form-data; boundary="; 67 if (!contentType.starts_with(boundaryFormat)) 68 { 69 return ParserError::ERROR_BOUNDARY_FORMAT; 70 } 71 72 std::string_view ctBoundary = contentType.substr(boundaryFormat.size()); 73 74 boundary = "\r\n--"; 75 boundary += ctBoundary; 76 indexBoundary(); 77 lookbehind.resize(boundary.size() + 8); 78 state = State::START; 79 80 const std::string& buffer = req.body(); 81 size_t len = buffer.size(); 82 char cl = 0; 83 84 for (size_t i = 0; i < len; i++) 85 { 86 char c = buffer[i]; 87 switch (state) 88 { 89 case State::START: 90 index = 0; 91 state = State::START_BOUNDARY; 92 [[fallthrough]]; 93 case State::START_BOUNDARY: 94 if (index == boundary.size() - 2) 95 { 96 if (c != cr) 97 { 98 return ParserError::ERROR_BOUNDARY_CR; 99 } 100 index++; 101 break; 102 } 103 else if (index - 1 == boundary.size() - 2) 104 { 105 if (c != lf) 106 { 107 return ParserError::ERROR_BOUNDARY_LF; 108 } 109 index = 0; 110 mime_fields.emplace_back(); 111 state = State::HEADER_FIELD_START; 112 break; 113 } 114 if (c != boundary[index + 2]) 115 { 116 return ParserError::ERROR_BOUNDARY_DATA; 117 } 118 index++; 119 break; 120 case State::HEADER_FIELD_START: 121 currentHeaderName.resize(0); 122 state = State::HEADER_FIELD; 123 headerFieldMark = i; 124 index = 0; 125 [[fallthrough]]; 126 case State::HEADER_FIELD: 127 if (c == cr) 128 { 129 headerFieldMark = 0; 130 state = State::HEADERS_ALMOST_DONE; 131 break; 132 } 133 134 index++; 135 if (c == hyphen) 136 { 137 break; 138 } 139 140 if (c == colon) 141 { 142 if (index == 1) 143 { 144 return ParserError::ERROR_EMPTY_HEADER; 145 } 146 147 currentHeaderName.append(&buffer[headerFieldMark], 148 i - headerFieldMark); 149 state = State::HEADER_VALUE_START; 150 break; 151 } 152 cl = lower(c); 153 if (cl < 'a' || cl > 'z') 154 { 155 return ParserError::ERROR_HEADER_NAME; 156 } 157 break; 158 case State::HEADER_VALUE_START: 159 if (c == space) 160 { 161 break; 162 } 163 headerValueMark = i; 164 state = State::HEADER_VALUE; 165 [[fallthrough]]; 166 case State::HEADER_VALUE: 167 if (c == cr) 168 { 169 std::string_view value(&buffer[headerValueMark], 170 i - headerValueMark); 171 mime_fields.rbegin()->fields.set(currentHeaderName, 172 value); 173 state = State::HEADER_VALUE_ALMOST_DONE; 174 } 175 break; 176 case State::HEADER_VALUE_ALMOST_DONE: 177 if (c != lf) 178 { 179 return ParserError::ERROR_HEADER_VALUE; 180 } 181 state = State::HEADER_FIELD_START; 182 break; 183 case State::HEADERS_ALMOST_DONE: 184 if (c != lf) 185 { 186 return ParserError::ERROR_HEADER_ENDING; 187 } 188 if (index > 0) 189 { 190 return ParserError::ERROR_UNEXPECTED_END_OF_HEADER; 191 } 192 state = State::PART_DATA_START; 193 break; 194 case State::PART_DATA_START: 195 state = State::PART_DATA; 196 partDataMark = i; 197 [[fallthrough]]; 198 case State::PART_DATA: 199 { 200 if (index == 0) 201 { 202 skipNonBoundary(buffer, boundary.size() - 1, i); 203 c = buffer[i]; 204 } 205 if (auto ec = processPartData(buffer, i, c); 206 ec != ParserError::PARSER_SUCCESS) 207 { 208 return ec; 209 } 210 break; 211 } 212 case State::END: 213 break; 214 default: 215 return ParserError::ERROR_UNEXPECTED_END_OF_INPUT; 216 } 217 } 218 219 if (state != State::END) 220 { 221 return ParserError::ERROR_UNEXPECTED_END_OF_INPUT; 222 } 223 224 return ParserError::PARSER_SUCCESS; 225 } 226 std::vector<FormPart> mime_fields; 227 std::string boundary; 228 229 private: indexBoundary()230 void indexBoundary() 231 { 232 std::ranges::fill(boundaryIndex, 0); 233 for (const char current : boundary) 234 { 235 boundaryIndex[static_cast<unsigned char>(current)] = true; 236 } 237 } 238 lower(char c)239 static char lower(char c) 240 { 241 return static_cast<char>(c | 0x20); 242 } 243 isBoundaryChar(char c) const244 bool isBoundaryChar(char c) const 245 { 246 return boundaryIndex[static_cast<unsigned char>(c)]; 247 } 248 skipNonBoundary(const std::string & buffer,size_t boundaryEnd,size_t & i)249 void skipNonBoundary(const std::string& buffer, size_t boundaryEnd, 250 size_t& i) 251 { 252 // boyer-moore derived algorithm to safely skip non-boundary data 253 while (i + boundary.size() <= buffer.length()) 254 { 255 if (isBoundaryChar(buffer[i + boundaryEnd])) 256 { 257 break; 258 } 259 i += boundary.size(); 260 } 261 } 262 processPartData(const std::string & buffer,size_t & i,char c)263 ParserError processPartData(const std::string& buffer, size_t& i, char c) 264 { 265 size_t prevIndex = index; 266 267 if (index < boundary.size()) 268 { 269 if (boundary[index] == c) 270 { 271 if (index == 0) 272 { 273 const char* start = &buffer[partDataMark]; 274 size_t size = i - partDataMark; 275 mime_fields.rbegin()->content += 276 std::string_view(start, size); 277 } 278 index++; 279 } 280 else 281 { 282 index = 0; 283 } 284 } 285 else if (index == boundary.size()) 286 { 287 index++; 288 if (c == cr) 289 { 290 // cr = part boundary 291 flags = Boundary::PART_BOUNDARY; 292 } 293 else if (c == hyphen) 294 { 295 // hyphen = end boundary 296 flags = Boundary::END_BOUNDARY; 297 } 298 else 299 { 300 index = 0; 301 } 302 } 303 else 304 { 305 if (flags == Boundary::PART_BOUNDARY) 306 { 307 index = 0; 308 if (c == lf) 309 { 310 // unset the PART_BOUNDARY flag 311 flags = Boundary::NON_BOUNDARY; 312 mime_fields.emplace_back(); 313 state = State::HEADER_FIELD_START; 314 return ParserError::PARSER_SUCCESS; 315 } 316 } 317 if (flags == Boundary::END_BOUNDARY) 318 { 319 if (c == hyphen) 320 { 321 state = State::END; 322 } 323 else 324 { 325 flags = Boundary::NON_BOUNDARY; 326 index = 0; 327 } 328 } 329 } 330 331 if (index > 0) 332 { 333 if ((index - 1) >= lookbehind.size()) 334 { 335 // Should never happen, but when it does it won't cause crash 336 return ParserError::ERROR_OUT_OF_RANGE; 337 } 338 lookbehind[index - 1] = c; 339 } 340 else if (prevIndex > 0) 341 { 342 // if our boundary turned out to be rubbish, the captured 343 // lookbehind belongs to partData 344 345 mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex); 346 partDataMark = i; 347 348 // reconsider the current character even so it interrupted 349 // the sequence it could be the beginning of a new sequence 350 i--; 351 } 352 return ParserError::PARSER_SUCCESS; 353 } 354 355 std::string currentHeaderName; 356 std::string currentHeaderValue; 357 358 static constexpr char cr = '\r'; 359 static constexpr char lf = '\n'; 360 static constexpr char space = ' '; 361 static constexpr char hyphen = '-'; 362 static constexpr char colon = ':'; 363 364 std::array<bool, 256> boundaryIndex{}; 365 std::string lookbehind; 366 State state{State::START}; 367 Boundary flags{Boundary::NON_BOUNDARY}; 368 size_t index = 0; 369 size_t partDataMark = 0; 370 size_t headerFieldMark = 0; 371 size_t headerValueMark = 0; 372 }; 373