160c778b2SVlad Yasevich /* SCTP kernel implementation 21da177e4SLinus Torvalds * (C) Copyright IBM Corp. 2003, 2004 31da177e4SLinus Torvalds * 460c778b2SVlad Yasevich * This file is part of the SCTP kernel implementation 51da177e4SLinus Torvalds * 659c51591SMichael Opdenacker * This file contains the code relating the chunk abstraction. 71da177e4SLinus Torvalds * 860c778b2SVlad Yasevich * This SCTP implementation is free software; 91da177e4SLinus Torvalds * you can redistribute it and/or modify it under the terms of 101da177e4SLinus Torvalds * the GNU General Public License as published by 111da177e4SLinus Torvalds * the Free Software Foundation; either version 2, or (at your option) 121da177e4SLinus Torvalds * any later version. 131da177e4SLinus Torvalds * 1460c778b2SVlad Yasevich * This SCTP implementation is distributed in the hope that it 151da177e4SLinus Torvalds * will be useful, but WITHOUT ANY WARRANTY; without even the implied 161da177e4SLinus Torvalds * ************************ 171da177e4SLinus Torvalds * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 181da177e4SLinus Torvalds * See the GNU General Public License for more details. 191da177e4SLinus Torvalds * 201da177e4SLinus Torvalds * You should have received a copy of the GNU General Public License 214b2f13a2SJeff Kirsher * along with GNU CC; see the file COPYING. If not, see 224b2f13a2SJeff Kirsher * <http://www.gnu.org/licenses/>. 231da177e4SLinus Torvalds * 241da177e4SLinus Torvalds * Please send any bug reports or fixes you make to the 251da177e4SLinus Torvalds * email address(es): 2691705c61SDaniel Borkmann * lksctp developers <linux-sctp@vger.kernel.org> 271da177e4SLinus Torvalds * 281da177e4SLinus Torvalds * Written or modified by: 291da177e4SLinus Torvalds * Jon Grimm <jgrimm@us.ibm.com> 301da177e4SLinus Torvalds * Sridhar Samudrala <sri@us.ibm.com> 311da177e4SLinus Torvalds */ 321da177e4SLinus Torvalds 33145ce502SJoe Perches #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 34145ce502SJoe Perches 351da177e4SLinus Torvalds #include <linux/types.h> 361da177e4SLinus Torvalds #include <linux/kernel.h> 371da177e4SLinus Torvalds #include <linux/net.h> 381da177e4SLinus Torvalds #include <linux/inet.h> 391da177e4SLinus Torvalds #include <linux/skbuff.h> 405a0e3ad6STejun Heo #include <linux/slab.h> 411da177e4SLinus Torvalds #include <net/sock.h> 421da177e4SLinus Torvalds #include <net/sctp/sctp.h> 431da177e4SLinus Torvalds #include <net/sctp/sm.h> 441da177e4SLinus Torvalds 451da177e4SLinus Torvalds /* This file is mostly in anticipation of future work, but initially 461da177e4SLinus Torvalds * populate with fragment tracking for an outbound message. 471da177e4SLinus Torvalds */ 481da177e4SLinus Torvalds 491da177e4SLinus Torvalds /* Initialize datamsg from memory. */ 501da177e4SLinus Torvalds static void sctp_datamsg_init(struct sctp_datamsg *msg) 511da177e4SLinus Torvalds { 52c0acdfb4SReshetova, Elena refcount_set(&msg->refcnt, 1); 531da177e4SLinus Torvalds msg->send_failed = 0; 541da177e4SLinus Torvalds msg->send_error = 0; 550e3aef8dSVlad Yasevich msg->can_delay = 1; 561da177e4SLinus Torvalds msg->expires_at = 0; 571da177e4SLinus Torvalds INIT_LIST_HEAD(&msg->chunks); 581da177e4SLinus Torvalds } 591da177e4SLinus Torvalds 601da177e4SLinus Torvalds /* Allocate and initialize datamsg. */ 61dda91928SDaniel Borkmann static struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp) 621da177e4SLinus Torvalds { 631da177e4SLinus Torvalds struct sctp_datamsg *msg; 641da177e4SLinus Torvalds msg = kmalloc(sizeof(struct sctp_datamsg), gfp); 65e8c38751SLi Zefan if (msg) { 661da177e4SLinus Torvalds sctp_datamsg_init(msg); 671da177e4SLinus Torvalds SCTP_DBG_OBJCNT_INC(datamsg); 68e8c38751SLi Zefan } 691da177e4SLinus Torvalds return msg; 701da177e4SLinus Torvalds } 711da177e4SLinus Torvalds 72b61c654fSXin Long void sctp_datamsg_free(struct sctp_datamsg *msg) 73b61c654fSXin Long { 74b61c654fSXin Long struct sctp_chunk *chunk; 75b61c654fSXin Long 76b61c654fSXin Long /* This doesn't have to be a _safe vairant because 77b61c654fSXin Long * sctp_chunk_free() only drops the refs. 78b61c654fSXin Long */ 79b61c654fSXin Long list_for_each_entry(chunk, &msg->chunks, frag_list) 80b61c654fSXin Long sctp_chunk_free(chunk); 81b61c654fSXin Long 82b61c654fSXin Long sctp_datamsg_put(msg); 83b61c654fSXin Long } 84b61c654fSXin Long 851da177e4SLinus Torvalds /* Final destructruction of datamsg memory. */ 861da177e4SLinus Torvalds static void sctp_datamsg_destroy(struct sctp_datamsg *msg) 871da177e4SLinus Torvalds { 881da177e4SLinus Torvalds struct list_head *pos, *temp; 891da177e4SLinus Torvalds struct sctp_chunk *chunk; 901da177e4SLinus Torvalds struct sctp_sock *sp; 911da177e4SLinus Torvalds struct sctp_ulpevent *ev; 921da177e4SLinus Torvalds struct sctp_association *asoc = NULL; 931da177e4SLinus Torvalds int error = 0, notify; 941da177e4SLinus Torvalds 951da177e4SLinus Torvalds /* If we failed, we may need to notify. */ 961da177e4SLinus Torvalds notify = msg->send_failed ? -1 : 0; 971da177e4SLinus Torvalds 981da177e4SLinus Torvalds /* Release all references. */ 991da177e4SLinus Torvalds list_for_each_safe(pos, temp, &msg->chunks) { 1001da177e4SLinus Torvalds list_del_init(pos); 1011da177e4SLinus Torvalds chunk = list_entry(pos, struct sctp_chunk, frag_list); 1021da177e4SLinus Torvalds /* Check whether we _really_ need to notify. */ 1031da177e4SLinus Torvalds if (notify < 0) { 1041da177e4SLinus Torvalds asoc = chunk->asoc; 1051da177e4SLinus Torvalds if (msg->send_error) 1061da177e4SLinus Torvalds error = msg->send_error; 1071da177e4SLinus Torvalds else 1081da177e4SLinus Torvalds error = asoc->outqueue.error; 1091da177e4SLinus Torvalds 1101da177e4SLinus Torvalds sp = sctp_sk(asoc->base.sk); 1111da177e4SLinus Torvalds notify = sctp_ulpevent_type_enabled(SCTP_SEND_FAILED, 1121da177e4SLinus Torvalds &sp->subscribe); 1131da177e4SLinus Torvalds } 1141da177e4SLinus Torvalds 1151da177e4SLinus Torvalds /* Generate a SEND FAILED event only if enabled. */ 1161da177e4SLinus Torvalds if (notify > 0) { 1171da177e4SLinus Torvalds int sent; 1181da177e4SLinus Torvalds if (chunk->has_tsn) 1191da177e4SLinus Torvalds sent = SCTP_DATA_SENT; 1201da177e4SLinus Torvalds else 1211da177e4SLinus Torvalds sent = SCTP_DATA_UNSENT; 1221da177e4SLinus Torvalds 1231da177e4SLinus Torvalds ev = sctp_ulpevent_make_send_failed(asoc, chunk, sent, 1241da177e4SLinus Torvalds error, GFP_ATOMIC); 1251da177e4SLinus Torvalds if (ev) 1261da177e4SLinus Torvalds sctp_ulpq_tail_event(&asoc->ulpq, ev); 1271da177e4SLinus Torvalds } 1281da177e4SLinus Torvalds 1291da177e4SLinus Torvalds sctp_chunk_put(chunk); 1301da177e4SLinus Torvalds } 1311da177e4SLinus Torvalds 1321da177e4SLinus Torvalds SCTP_DBG_OBJCNT_DEC(datamsg); 1331da177e4SLinus Torvalds kfree(msg); 1341da177e4SLinus Torvalds } 1351da177e4SLinus Torvalds 1361da177e4SLinus Torvalds /* Hold a reference. */ 1371da177e4SLinus Torvalds static void sctp_datamsg_hold(struct sctp_datamsg *msg) 1381da177e4SLinus Torvalds { 139c0acdfb4SReshetova, Elena refcount_inc(&msg->refcnt); 1401da177e4SLinus Torvalds } 1411da177e4SLinus Torvalds 1421da177e4SLinus Torvalds /* Release a reference. */ 1431da177e4SLinus Torvalds void sctp_datamsg_put(struct sctp_datamsg *msg) 1441da177e4SLinus Torvalds { 145c0acdfb4SReshetova, Elena if (refcount_dec_and_test(&msg->refcnt)) 1461da177e4SLinus Torvalds sctp_datamsg_destroy(msg); 1471da177e4SLinus Torvalds } 1481da177e4SLinus Torvalds 1491da177e4SLinus Torvalds /* Assign a chunk to this datamsg. */ 1501da177e4SLinus Torvalds static void sctp_datamsg_assign(struct sctp_datamsg *msg, struct sctp_chunk *chunk) 1511da177e4SLinus Torvalds { 1521da177e4SLinus Torvalds sctp_datamsg_hold(msg); 1531da177e4SLinus Torvalds chunk->msg = msg; 1541da177e4SLinus Torvalds } 1551da177e4SLinus Torvalds 1561da177e4SLinus Torvalds 1571da177e4SLinus Torvalds /* A data chunk can have a maximum payload of (2^16 - 20). Break 1581da177e4SLinus Torvalds * down any such message into smaller chunks. Opportunistically, fragment 1591da177e4SLinus Torvalds * the chunks down to the current MTU constraints. We may get refragmented 1601da177e4SLinus Torvalds * later if the PMTU changes, but it is _much better_ to fragment immediately 1611da177e4SLinus Torvalds * with a reasonable guess than always doing our fragmentation on the 1621da177e4SLinus Torvalds * soft-interrupt. 1631da177e4SLinus Torvalds */ 1641da177e4SLinus Torvalds struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, 1651da177e4SLinus Torvalds struct sctp_sndrcvinfo *sinfo, 166e0eb093eSAl Viro struct iov_iter *from) 1671da177e4SLinus Torvalds { 168bfd2e4b8SMarcelo Ricardo Leitner size_t len, first_len, max_data, remaining; 169bfd2e4b8SMarcelo Ricardo Leitner size_t msg_len = iov_iter_count(from); 170bfd2e4b8SMarcelo Ricardo Leitner struct list_head *pos, *temp; 1711da177e4SLinus Torvalds struct sctp_chunk *chunk; 1721da177e4SLinus Torvalds struct sctp_datamsg *msg; 173bfd2e4b8SMarcelo Ricardo Leitner int err; 1741da177e4SLinus Torvalds 1751da177e4SLinus Torvalds msg = sctp_datamsg_new(GFP_KERNEL); 1761da177e4SLinus Torvalds if (!msg) 1776e51fe75STommi Rantala return ERR_PTR(-ENOMEM); 1781da177e4SLinus Torvalds 1791da177e4SLinus Torvalds /* Note: Calculate this outside of the loop, so that all fragments 1801da177e4SLinus Torvalds * have the same expiration. 1811da177e4SLinus Torvalds */ 1828ae808ebSXin Long if (asoc->peer.prsctp_capable && sinfo->sinfo_timetolive && 1838ae808ebSXin Long (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags) || 1848ae808ebSXin Long !SCTP_PR_POLICY(sinfo->sinfo_flags))) 1851da177e4SLinus Torvalds msg->expires_at = jiffies + 1861da177e4SLinus Torvalds msecs_to_jiffies(sinfo->sinfo_timetolive); 1870605483fSXin Long 1883e62abf9SVlad Yasevich /* This is the biggest possible DATA chunk that can fit into 1893e62abf9SVlad Yasevich * the packet 1903e62abf9SVlad Yasevich */ 1914a225ce3SMarcelo Ricardo Leitner max_data = asoc->pathmtu - 1923e62abf9SVlad Yasevich sctp_sk(asoc->base.sk)->pf->af->net_header_len - 1934a225ce3SMarcelo Ricardo Leitner sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk); 1944a225ce3SMarcelo Ricardo Leitner max_data = SCTP_TRUNC4(max_data); 1951da177e4SLinus Torvalds 1964cd57c80SVlad Yasevich /* If the the peer requested that we authenticate DATA chunks 1972bccbadfSwangweidong * we need to account for bundling of the AUTH chunks along with 1984cd57c80SVlad Yasevich * DATA. 1994cd57c80SVlad Yasevich */ 2004cd57c80SVlad Yasevich if (sctp_auth_send_cid(SCTP_CID_DATA, asoc)) { 2014cd57c80SVlad Yasevich struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc); 2024cd57c80SVlad Yasevich 2034cd57c80SVlad Yasevich if (hmac_desc) 204e2f036a9SMarcelo Ricardo Leitner max_data -= SCTP_PAD4(sizeof(sctp_auth_chunk_t) + 2054cd57c80SVlad Yasevich hmac_desc->hmac_len); 2064cd57c80SVlad Yasevich } 2074cd57c80SVlad Yasevich 208bfd2e4b8SMarcelo Ricardo Leitner /* Check what's our max considering the above */ 209bfd2e4b8SMarcelo Ricardo Leitner max_data = min_t(size_t, max_data, asoc->frag_point); 2103e62abf9SVlad Yasevich 211bfd2e4b8SMarcelo Ricardo Leitner /* Set first_len and then account for possible bundles on first frag */ 212bfd2e4b8SMarcelo Ricardo Leitner first_len = max_data; 2131da177e4SLinus Torvalds 2145d7ff261SVlad Yasevich /* Check to see if we have a pending SACK and try to let it be bundled 2155d7ff261SVlad Yasevich * with this message. Do this if we don't have any data queued already. 2165d7ff261SVlad Yasevich * To check that, look at out_qlen and retransmit list. 2175d7ff261SVlad Yasevich * NOTE: we will not reduce to account for SACK, if the message would 2185d7ff261SVlad Yasevich * not have been fragmented. 2195d7ff261SVlad Yasevich */ 2205d7ff261SVlad Yasevich if (timer_pending(&asoc->timers[SCTP_EVENT_TIMEOUT_SACK]) && 2215d7ff261SVlad Yasevich asoc->outqueue.out_qlen == 0 && 2225d7ff261SVlad Yasevich list_empty(&asoc->outqueue.retransmit) && 223bfd2e4b8SMarcelo Ricardo Leitner msg_len > max_data) 224*d4d6c614SXin Long first_len -= SCTP_PAD4(sizeof(struct sctp_sack_chunk)); 2255d7ff261SVlad Yasevich 2261da177e4SLinus Torvalds /* Encourage Cookie-ECHO bundling. */ 2275d7ff261SVlad Yasevich if (asoc->state < SCTP_STATE_COOKIE_ECHOED) 228bfd2e4b8SMarcelo Ricardo Leitner first_len -= SCTP_ARBITRARY_COOKIE_ECHO_LEN; 2293e62abf9SVlad Yasevich 2303e62abf9SVlad Yasevich /* Account for a different sized first fragment */ 2313e62abf9SVlad Yasevich if (msg_len >= first_len) { 2320e3aef8dSVlad Yasevich msg->can_delay = 0; 233bfd2e4b8SMarcelo Ricardo Leitner SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS); 234bfd2e4b8SMarcelo Ricardo Leitner } else { 235bfd2e4b8SMarcelo Ricardo Leitner /* Which may be the only one... */ 236bfd2e4b8SMarcelo Ricardo Leitner first_len = msg_len; 2371da177e4SLinus Torvalds } 2381da177e4SLinus Torvalds 239bfd2e4b8SMarcelo Ricardo Leitner /* Create chunks for all DATA chunks. */ 240bfd2e4b8SMarcelo Ricardo Leitner for (remaining = msg_len; remaining; remaining -= len) { 241bfd2e4b8SMarcelo Ricardo Leitner u8 frag = SCTP_DATA_MIDDLE_FRAG; 2421da177e4SLinus Torvalds 243bfd2e4b8SMarcelo Ricardo Leitner if (remaining == msg_len) { 244bfd2e4b8SMarcelo Ricardo Leitner /* First frag, which may also be the last */ 2451da177e4SLinus Torvalds frag |= SCTP_DATA_FIRST_FRAG; 246bfd2e4b8SMarcelo Ricardo Leitner len = first_len; 247bfd2e4b8SMarcelo Ricardo Leitner } else { 248bfd2e4b8SMarcelo Ricardo Leitner /* Middle frags */ 249bfd2e4b8SMarcelo Ricardo Leitner len = max_data; 250bfd2e4b8SMarcelo Ricardo Leitner } 2511da177e4SLinus Torvalds 252bfd2e4b8SMarcelo Ricardo Leitner if (len >= remaining) { 253bfd2e4b8SMarcelo Ricardo Leitner /* Last frag, which may also be the first */ 254bfd2e4b8SMarcelo Ricardo Leitner len = remaining; 2551da177e4SLinus Torvalds frag |= SCTP_DATA_LAST_FRAG; 2561da177e4SLinus Torvalds 257b93d6471SWei Yongjun /* The application requests to set the I-bit of the 258b93d6471SWei Yongjun * last DATA chunk of a user message when providing 259b93d6471SWei Yongjun * the user message to the SCTP implementation. 260b93d6471SWei Yongjun */ 261b93d6471SWei Yongjun if ((sinfo->sinfo_flags & SCTP_EOF) || 262b93d6471SWei Yongjun (sinfo->sinfo_flags & SCTP_SACK_IMMEDIATELY)) 263b93d6471SWei Yongjun frag |= SCTP_DATA_SACK_IMM; 264b93d6471SWei Yongjun } 265b93d6471SWei Yongjun 266cea8768fSMarcelo Ricardo Leitner chunk = sctp_make_datafrag_empty(asoc, sinfo, len, frag, 267cea8768fSMarcelo Ricardo Leitner 0, GFP_KERNEL); 2686e51fe75STommi Rantala if (!chunk) { 2696e51fe75STommi Rantala err = -ENOMEM; 2701da177e4SLinus Torvalds goto errout; 2716e51fe75STommi Rantala } 2726e51fe75STommi Rantala 273e0eb093eSAl Viro err = sctp_user_addto_chunk(chunk, len, from); 2741da177e4SLinus Torvalds if (err < 0) 275be364c8cSTommi Rantala goto errout_chunk_free; 2761da177e4SLinus Torvalds 2771da177e4SLinus Torvalds /* Put the chunk->skb back into the form expected by send. */ 278bfd2e4b8SMarcelo Ricardo Leitner __skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr - 279bfd2e4b8SMarcelo Ricardo Leitner chunk->skb->data); 2801da177e4SLinus Torvalds 2811da177e4SLinus Torvalds sctp_datamsg_assign(msg, chunk); 2821da177e4SLinus Torvalds list_add_tail(&chunk->frag_list, &msg->chunks); 2831da177e4SLinus Torvalds } 2841da177e4SLinus Torvalds 2851da177e4SLinus Torvalds return msg; 2861da177e4SLinus Torvalds 287be364c8cSTommi Rantala errout_chunk_free: 288be364c8cSTommi Rantala sctp_chunk_free(chunk); 289be364c8cSTommi Rantala 2901da177e4SLinus Torvalds errout: 2911da177e4SLinus Torvalds list_for_each_safe(pos, temp, &msg->chunks) { 2921da177e4SLinus Torvalds list_del_init(pos); 2931da177e4SLinus Torvalds chunk = list_entry(pos, struct sctp_chunk, frag_list); 2941da177e4SLinus Torvalds sctp_chunk_free(chunk); 2951da177e4SLinus Torvalds } 29680445cfbSFlorian Westphal sctp_datamsg_put(msg); 297bfd2e4b8SMarcelo Ricardo Leitner 2986e51fe75STommi Rantala return ERR_PTR(err); 2991da177e4SLinus Torvalds } 3001da177e4SLinus Torvalds 3011da177e4SLinus Torvalds /* Check whether this message has expired. */ 3021da177e4SLinus Torvalds int sctp_chunk_abandoned(struct sctp_chunk *chunk) 3031da177e4SLinus Torvalds { 3048ae808ebSXin Long if (!chunk->asoc->peer.prsctp_capable) 3051da177e4SLinus Torvalds return 0; 3061da177e4SLinus Torvalds 307a6c2f792SXin Long if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && 3080605483fSXin Long time_after(jiffies, chunk->msg->expires_at)) { 309d229d48dSXin Long struct sctp_stream_out *streamout = 310cee360abSXin Long &chunk->asoc->stream.out[chunk->sinfo.sinfo_stream]; 311d229d48dSXin Long 312d229d48dSXin Long if (chunk->sent_count) { 313a6c2f792SXin Long chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++; 314d229d48dSXin Long streamout->abandoned_sent[SCTP_PR_INDEX(TTL)]++; 315d229d48dSXin Long } else { 316a6c2f792SXin Long chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; 317d229d48dSXin Long streamout->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; 318d229d48dSXin Long } 319a6c2f792SXin Long return 1; 32001aadb3aSXin Long } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && 3210605483fSXin Long chunk->sent_count > chunk->sinfo.sinfo_timetolive) { 322d229d48dSXin Long struct sctp_stream_out *streamout = 323cee360abSXin Long &chunk->asoc->stream.out[chunk->sinfo.sinfo_stream]; 324d229d48dSXin Long 32501aadb3aSXin Long chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; 326d229d48dSXin Long streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++; 32701aadb3aSXin Long return 1; 3288ae808ebSXin Long } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) && 3298ae808ebSXin Long chunk->msg->expires_at && 3308ae808ebSXin Long time_after(jiffies, chunk->msg->expires_at)) { 3318ae808ebSXin Long return 1; 332a6c2f792SXin Long } 3338dbdf1f5SXin Long /* PRIO policy is processed by sendmsg, not here */ 334a6c2f792SXin Long 335a6c2f792SXin Long return 0; 336a6c2f792SXin Long } 337a6c2f792SXin Long 3381da177e4SLinus Torvalds /* This chunk (and consequently entire message) has failed in its sending. */ 3391da177e4SLinus Torvalds void sctp_chunk_fail(struct sctp_chunk *chunk, int error) 3401da177e4SLinus Torvalds { 3411da177e4SLinus Torvalds chunk->msg->send_failed = 1; 3421da177e4SLinus Torvalds chunk->msg->send_error = error; 3431da177e4SLinus Torvalds } 344