1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* 3 * RDMA Transport Layer 4 * 5 * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. 6 * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. 7 * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. 8 */ 9 10 #ifndef RTRS_PRI_H 11 #define RTRS_PRI_H 12 13 #include <linux/uuid.h> 14 #include <rdma/rdma_cm.h> 15 #include <rdma/ib_verbs.h> 16 #include <rdma/ib.h> 17 18 #include "rtrs.h" 19 20 #define RTRS_PROTO_VER_MAJOR 2 21 #define RTRS_PROTO_VER_MINOR 0 22 23 #define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \ 24 __stringify(RTRS_PROTO_VER_MINOR) 25 26 enum rtrs_imm_const { 27 MAX_IMM_TYPE_BITS = 4, 28 MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1), 29 MAX_IMM_PAYL_BITS = 28, 30 MAX_IMM_PAYL_MASK = ((1 << MAX_IMM_PAYL_BITS) - 1), 31 }; 32 33 enum rtrs_imm_type { 34 RTRS_IO_REQ_IMM = 0, /* client to server */ 35 RTRS_IO_RSP_IMM = 1, /* server to client */ 36 RTRS_IO_RSP_W_INV_IMM = 2, /* server to client */ 37 38 RTRS_HB_MSG_IMM = 8, /* HB: HeartBeat */ 39 RTRS_HB_ACK_IMM = 9, 40 41 RTRS_LAST_IMM, 42 }; 43 44 enum { 45 SERVICE_CON_QUEUE_DEPTH = 512, 46 47 MAX_PATHS_NUM = 128, 48 49 /* 50 * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS) 51 * and the minimum chunk size is 4096 (2^12). 52 * So the maximum sess_queue_depth is 65536 (2^16) in theory. 53 * But mempool_create, create_qp and ib_post_send fail with 54 * "cannot allocate memory" error if sess_queue_depth is too big. 55 * Therefore the pratical max value of sess_queue_depth is 56 * somewhere between 1 and 65534 and it depends on the system. 57 */ 58 MAX_SESS_QUEUE_DEPTH = 65535, 59 MIN_CHUNK_SIZE = 8192, 60 61 RTRS_HB_INTERVAL_MS = 5000, 62 RTRS_HB_MISSED_MAX = 5, 63 64 RTRS_MAGIC = 0x1BBD, 65 RTRS_PROTO_VER = (RTRS_PROTO_VER_MAJOR << 8) | RTRS_PROTO_VER_MINOR, 66 }; 67 68 struct rtrs_ib_dev; 69 70 struct rtrs_rdma_dev_pd_ops { 71 struct rtrs_ib_dev *(*alloc)(void); 72 void (*free)(struct rtrs_ib_dev *dev); 73 int (*init)(struct rtrs_ib_dev *dev); 74 void (*deinit)(struct rtrs_ib_dev *dev); 75 }; 76 77 struct rtrs_rdma_dev_pd { 78 struct mutex mutex; 79 struct list_head list; 80 enum ib_pd_flags pd_flags; 81 const struct rtrs_rdma_dev_pd_ops *ops; 82 }; 83 84 struct rtrs_ib_dev { 85 struct ib_device *ib_dev; 86 struct ib_pd *ib_pd; 87 struct kref ref; 88 struct list_head entry; 89 struct rtrs_rdma_dev_pd *pool; 90 }; 91 92 struct rtrs_con { 93 struct rtrs_sess *sess; 94 struct ib_qp *qp; 95 struct ib_cq *cq; 96 struct rdma_cm_id *cm_id; 97 unsigned int cid; 98 int nr_cqe; 99 }; 100 101 struct rtrs_sess { 102 struct list_head entry; 103 struct sockaddr_storage dst_addr; 104 struct sockaddr_storage src_addr; 105 char sessname[NAME_MAX]; 106 uuid_t uuid; 107 struct rtrs_con **con; 108 unsigned int con_num; 109 unsigned int irq_con_num; 110 unsigned int recon_cnt; 111 struct rtrs_ib_dev *dev; 112 int dev_ref; 113 struct ib_cqe *hb_cqe; 114 void (*hb_err_handler)(struct rtrs_con *con); 115 struct workqueue_struct *hb_wq; 116 struct delayed_work hb_dwork; 117 unsigned int hb_interval_ms; 118 unsigned int hb_missed_cnt; 119 unsigned int hb_missed_max; 120 ktime_t hb_last_sent; 121 ktime_t hb_cur_latency; 122 }; 123 124 /* rtrs information unit */ 125 struct rtrs_iu { 126 struct ib_cqe cqe; 127 dma_addr_t dma_addr; 128 void *buf; 129 size_t size; 130 enum dma_data_direction direction; 131 }; 132 133 /** 134 * enum rtrs_msg_types - RTRS message types, see also rtrs/README 135 * @RTRS_MSG_INFO_REQ: Client additional info request to the server 136 * @RTRS_MSG_INFO_RSP: Server additional info response to the client 137 * @RTRS_MSG_WRITE: Client writes data per RDMA to server 138 * @RTRS_MSG_READ: Client requests data transfer from server 139 * @RTRS_MSG_RKEY_RSP: Server refreshed rkey for rbuf 140 */ 141 enum rtrs_msg_types { 142 RTRS_MSG_INFO_REQ, 143 RTRS_MSG_INFO_RSP, 144 RTRS_MSG_WRITE, 145 RTRS_MSG_READ, 146 RTRS_MSG_RKEY_RSP, 147 }; 148 149 /** 150 * enum rtrs_msg_flags - RTRS message flags. 151 * @RTRS_NEED_INVAL: Send invalidation in response. 152 * @RTRS_MSG_NEW_RKEY_F: Send refreshed rkey in response. 153 */ 154 enum rtrs_msg_flags { 155 RTRS_MSG_NEED_INVAL_F = 1 << 0, 156 RTRS_MSG_NEW_RKEY_F = 1 << 1, 157 }; 158 159 /** 160 * struct rtrs_sg_desc - RDMA-Buffer entry description 161 * @addr: Address of RDMA destination buffer 162 * @key: Authorization rkey to write to the buffer 163 * @len: Size of the buffer 164 */ 165 struct rtrs_sg_desc { 166 __le64 addr; 167 __le32 key; 168 __le32 len; 169 }; 170 171 /** 172 * struct rtrs_msg_conn_req - Client connection request to the server 173 * @magic: RTRS magic 174 * @version: RTRS protocol version 175 * @cid: Current connection id 176 * @cid_num: Number of connections per session 177 * @recon_cnt: Reconnections counter 178 * @sess_uuid: UUID of a session (path) 179 * @paths_uuid: UUID of a group of sessions (paths) 180 * 181 * NOTE: max size 56 bytes, see man rdma_connect(). 182 */ 183 struct rtrs_msg_conn_req { 184 /* Is set to 0 by cma.c in case of AF_IB, do not touch that. 185 * see https://www.spinics.net/lists/linux-rdma/msg22397.html 186 */ 187 u8 __cma_version; 188 /* On sender side that should be set to 0, or cma_save_ip_info() 189 * extract garbage and will fail. 190 */ 191 u8 __ip_version; 192 __le16 magic; 193 __le16 version; 194 __le16 cid; 195 __le16 cid_num; 196 __le16 recon_cnt; 197 uuid_t sess_uuid; 198 uuid_t paths_uuid; 199 u8 first_conn : 1; 200 u8 reserved_bits : 7; 201 u8 reserved[11]; 202 }; 203 204 /** 205 * struct rtrs_msg_conn_rsp - Server connection response to the client 206 * @magic: RTRS magic 207 * @version: RTRS protocol version 208 * @errno: If rdma_accept() then 0, if rdma_reject() indicates error 209 * @queue_depth: max inflight messages (queue-depth) in this session 210 * @max_io_size: max io size server supports 211 * @max_hdr_size: max msg header size server supports 212 * 213 * NOTE: size is 56 bytes, max possible is 136 bytes, see man rdma_accept(). 214 */ 215 struct rtrs_msg_conn_rsp { 216 __le16 magic; 217 __le16 version; 218 __le16 errno; 219 __le16 queue_depth; 220 __le32 max_io_size; 221 __le32 max_hdr_size; 222 __le32 flags; 223 u8 reserved[36]; 224 }; 225 226 /** 227 * struct rtrs_msg_info_req 228 * @type: @RTRS_MSG_INFO_REQ 229 * @sessname: Session name chosen by client 230 */ 231 struct rtrs_msg_info_req { 232 __le16 type; 233 u8 sessname[NAME_MAX]; 234 u8 reserved[15]; 235 }; 236 237 /** 238 * struct rtrs_msg_info_rsp 239 * @type: @RTRS_MSG_INFO_RSP 240 * @sg_cnt: Number of @desc entries 241 * @desc: RDMA buffers where the client can write to server 242 */ 243 struct rtrs_msg_info_rsp { 244 __le16 type; 245 __le16 sg_cnt; 246 u8 reserved[4]; 247 struct rtrs_sg_desc desc[]; 248 }; 249 250 /** 251 * struct rtrs_msg_rkey_rsp 252 * @type: @RTRS_MSG_RKEY_RSP 253 * @buf_id: RDMA buf_id of the new rkey 254 * @rkey: new remote key for RDMA buffers id from server 255 */ 256 struct rtrs_msg_rkey_rsp { 257 __le16 type; 258 __le16 buf_id; 259 __le32 rkey; 260 }; 261 262 /** 263 * struct rtrs_msg_rdma_read - RDMA data transfer request from client 264 * @type: always @RTRS_MSG_READ 265 * @usr_len: length of user payload 266 * @sg_cnt: number of @desc entries 267 * @desc: RDMA buffers where the server can write the result to 268 */ 269 struct rtrs_msg_rdma_read { 270 __le16 type; 271 __le16 usr_len; 272 __le16 flags; 273 __le16 sg_cnt; 274 struct rtrs_sg_desc desc[]; 275 }; 276 277 /** 278 * struct_msg_rdma_write - Message transferred to server with RDMA-Write 279 * @type: always @RTRS_MSG_WRITE 280 * @usr_len: length of user payload 281 */ 282 struct rtrs_msg_rdma_write { 283 __le16 type; 284 __le16 usr_len; 285 }; 286 287 /** 288 * struct_msg_rdma_hdr - header for read or write request 289 * @type: @RTRS_MSG_WRITE | @RTRS_MSG_READ 290 */ 291 struct rtrs_msg_rdma_hdr { 292 __le16 type; 293 }; 294 295 /* rtrs.c */ 296 297 struct rtrs_iu *rtrs_iu_alloc(u32 queue_num, size_t size, gfp_t t, 298 struct ib_device *dev, enum dma_data_direction, 299 void (*done)(struct ib_cq *cq, struct ib_wc *wc)); 300 void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_num); 301 int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu); 302 int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, 303 struct ib_send_wr *head); 304 int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu, 305 struct ib_sge *sge, unsigned int num_sge, 306 u32 rkey, u64 rdma_addr, u32 imm_data, 307 enum ib_send_flags flags, 308 struct ib_send_wr *head, 309 struct ib_send_wr *tail); 310 311 int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe); 312 int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe, 313 u32 imm_data, enum ib_send_flags flags, 314 struct ib_send_wr *head); 315 316 int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, 317 u32 max_send_sge, int cq_vector, int nr_cqe, 318 u32 max_send_wr, u32 max_recv_wr, 319 enum ib_poll_context poll_ctx); 320 void rtrs_cq_qp_destroy(struct rtrs_con *con); 321 322 void rtrs_init_hb(struct rtrs_sess *sess, struct ib_cqe *cqe, 323 unsigned int interval_ms, unsigned int missed_max, 324 void (*err_handler)(struct rtrs_con *con), 325 struct workqueue_struct *wq); 326 void rtrs_start_hb(struct rtrs_sess *sess); 327 void rtrs_stop_hb(struct rtrs_sess *sess); 328 void rtrs_send_hb_ack(struct rtrs_sess *sess); 329 330 void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags, 331 struct rtrs_rdma_dev_pd *pool); 332 void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd *pool); 333 334 struct rtrs_ib_dev *rtrs_ib_dev_find_or_add(struct ib_device *ib_dev, 335 struct rtrs_rdma_dev_pd *pool); 336 int rtrs_ib_dev_put(struct rtrs_ib_dev *dev); 337 338 static inline u32 rtrs_to_imm(u32 type, u32 payload) 339 { 340 BUILD_BUG_ON(MAX_IMM_PAYL_BITS + MAX_IMM_TYPE_BITS != 32); 341 BUILD_BUG_ON(RTRS_LAST_IMM > (1<<MAX_IMM_TYPE_BITS)); 342 return ((type & MAX_IMM_TYPE_MASK) << MAX_IMM_PAYL_BITS) | 343 (payload & MAX_IMM_PAYL_MASK); 344 } 345 346 static inline void rtrs_from_imm(u32 imm, u32 *type, u32 *payload) 347 { 348 *payload = imm & MAX_IMM_PAYL_MASK; 349 *type = imm >> MAX_IMM_PAYL_BITS; 350 } 351 352 static inline u32 rtrs_to_io_req_imm(u32 addr) 353 { 354 return rtrs_to_imm(RTRS_IO_REQ_IMM, addr); 355 } 356 357 static inline u32 rtrs_to_io_rsp_imm(u32 msg_id, int errno, bool w_inval) 358 { 359 enum rtrs_imm_type type; 360 u32 payload; 361 362 /* 9 bits for errno, 19 bits for msg_id */ 363 payload = (abs(errno) & 0x1ff) << 19 | (msg_id & 0x7ffff); 364 type = w_inval ? RTRS_IO_RSP_W_INV_IMM : RTRS_IO_RSP_IMM; 365 366 return rtrs_to_imm(type, payload); 367 } 368 369 static inline void rtrs_from_io_rsp_imm(u32 payload, u32 *msg_id, int *errno) 370 { 371 /* 9 bits for errno, 19 bits for msg_id */ 372 *msg_id = payload & 0x7ffff; 373 *errno = -(int)((payload >> 19) & 0x1ff); 374 } 375 376 #define STAT_STORE_FUNC(type, set_value, reset) \ 377 static ssize_t set_value##_store(struct kobject *kobj, \ 378 struct kobj_attribute *attr, \ 379 const char *buf, size_t count) \ 380 { \ 381 int ret = -EINVAL; \ 382 type *stats = container_of(kobj, type, kobj_stats); \ 383 \ 384 if (sysfs_streq(buf, "1")) \ 385 ret = reset(stats, true); \ 386 else if (sysfs_streq(buf, "0")) \ 387 ret = reset(stats, false); \ 388 if (ret) \ 389 return ret; \ 390 \ 391 return count; \ 392 } 393 394 #define STAT_SHOW_FUNC(type, get_value, print) \ 395 static ssize_t get_value##_show(struct kobject *kobj, \ 396 struct kobj_attribute *attr, \ 397 char *page) \ 398 { \ 399 type *stats = container_of(kobj, type, kobj_stats); \ 400 \ 401 return print(stats, page, PAGE_SIZE); \ 402 } 403 404 #define STAT_ATTR(type, stat, print, reset) \ 405 STAT_STORE_FUNC(type, stat, reset) \ 406 STAT_SHOW_FUNC(type, stat, print) \ 407 static struct kobj_attribute stat##_attr = __ATTR_RW(stat) 408 409 #endif /* RTRS_PRI_H */ 410