ip_output.c (9e9fd65d1fa51d919d54d731be0e66492b5b6c5a) ip_output.c (be9f4a44e7d41cee50ddb5f038fc2391cbbb4046)
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * The Internet Protocol (IP) output module.
7 *
8 * Authors: Ross Biro

--- 99 unchanged lines hidden (view full) ---

108 err = __ip_local_out(skb);
109 if (likely(err == 1))
110 err = dst_output(skb);
111
112 return err;
113}
114EXPORT_SYMBOL_GPL(ip_local_out);
115
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * The Internet Protocol (IP) output module.
7 *
8 * Authors: Ross Biro

--- 99 unchanged lines hidden (view full) ---

108 err = __ip_local_out(skb);
109 if (likely(err == 1))
110 err = dst_output(skb);
111
112 return err;
113}
114EXPORT_SYMBOL_GPL(ip_local_out);
115
116/* dev_loopback_xmit for use with netfilter. */
117static int ip_dev_loopback_xmit(struct sk_buff *newskb)
118{
119 skb_reset_mac_header(newskb);
120 __skb_pull(newskb, skb_network_offset(newskb));
121 newskb->pkt_type = PACKET_LOOPBACK;
122 newskb->ip_summed = CHECKSUM_UNNECESSARY;
123 WARN_ON(!skb_dst(newskb));
124 skb_dst_force(newskb);
125 netif_rx_ni(newskb);
126 return 0;
127}
128
129static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
130{
131 int ttl = inet->uc_ttl;
132
133 if (ttl < 0)
134 ttl = ip4_dst_hoplimit(dst);
135 return ttl;
136}

--- 41 unchanged lines hidden (view full) ---

178
179static inline int ip_finish_output2(struct sk_buff *skb)
180{
181 struct dst_entry *dst = skb_dst(skb);
182 struct rtable *rt = (struct rtable *)dst;
183 struct net_device *dev = dst->dev;
184 unsigned int hh_len = LL_RESERVED_SPACE(dev);
185 struct neighbour *neigh;
116static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
117{
118 int ttl = inet->uc_ttl;
119
120 if (ttl < 0)
121 ttl = ip4_dst_hoplimit(dst);
122 return ttl;
123}

--- 41 unchanged lines hidden (view full) ---

165
166static inline int ip_finish_output2(struct sk_buff *skb)
167{
168 struct dst_entry *dst = skb_dst(skb);
169 struct rtable *rt = (struct rtable *)dst;
170 struct net_device *dev = dst->dev;
171 unsigned int hh_len = LL_RESERVED_SPACE(dev);
172 struct neighbour *neigh;
173 u32 nexthop;
186
187 if (rt->rt_type == RTN_MULTICAST) {
188 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len);
189 } else if (rt->rt_type == RTN_BROADCAST)
190 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTBCAST, skb->len);
191
192 /* Be paranoid, rather than too clever. */
193 if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
194 struct sk_buff *skb2;
195
196 skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
197 if (skb2 == NULL) {
198 kfree_skb(skb);
199 return -ENOMEM;
200 }
201 if (skb->sk)
202 skb_set_owner_w(skb2, skb->sk);
174
175 if (rt->rt_type == RTN_MULTICAST) {
176 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len);
177 } else if (rt->rt_type == RTN_BROADCAST)
178 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTBCAST, skb->len);
179
180 /* Be paranoid, rather than too clever. */
181 if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
182 struct sk_buff *skb2;
183
184 skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
185 if (skb2 == NULL) {
186 kfree_skb(skb);
187 return -ENOMEM;
188 }
189 if (skb->sk)
190 skb_set_owner_w(skb2, skb->sk);
203 kfree_skb(skb);
191 consume_skb(skb);
204 skb = skb2;
205 }
206
192 skb = skb2;
193 }
194
207 rcu_read_lock();
208 neigh = dst_get_neighbour_noref(dst);
195 rcu_read_lock_bh();
196 nexthop = rt->rt_gateway ? rt->rt_gateway : ip_hdr(skb)->daddr;
197 neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
198 if (unlikely(!neigh))
199 neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
209 if (neigh) {
200 if (neigh) {
210 int res = neigh_output(neigh, skb);
201 int res = dst_neigh_output(dst, neigh, skb);
211
202
212 rcu_read_unlock();
203 rcu_read_unlock_bh();
213 return res;
214 }
204 return res;
205 }
215 rcu_read_unlock();
206 rcu_read_unlock_bh();
216
217 net_dbg_ratelimited("%s: No header cache and no neighbour!\n",
218 __func__);
219 kfree_skb(skb);
220 return -EINVAL;
221}
222
223static inline int ip_skb_dst_mtu(struct sk_buff *skb)

--- 52 unchanged lines hidden (view full) ---

276 ((rt->rt_flags & RTCF_LOCAL) ||
277 !(IPCB(skb)->flags & IPSKB_FORWARDED))
278#endif
279 ) {
280 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
281 if (newskb)
282 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
283 newskb, NULL, newskb->dev,
207
208 net_dbg_ratelimited("%s: No header cache and no neighbour!\n",
209 __func__);
210 kfree_skb(skb);
211 return -EINVAL;
212}
213
214static inline int ip_skb_dst_mtu(struct sk_buff *skb)

--- 52 unchanged lines hidden (view full) ---

267 ((rt->rt_flags & RTCF_LOCAL) ||
268 !(IPCB(skb)->flags & IPSKB_FORWARDED))
269#endif
270 ) {
271 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
272 if (newskb)
273 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
274 newskb, NULL, newskb->dev,
284 ip_dev_loopback_xmit);
275 dev_loopback_xmit);
285 }
286
287 /* Multicasts with ttl 0 must not go beyond the host */
288
289 if (ip_hdr(skb)->ttl == 0) {
290 kfree_skb(skb);
291 return 0;
292 }
293 }
294
295 if (rt->rt_flags&RTCF_BROADCAST) {
296 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
297 if (newskb)
298 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb,
276 }
277
278 /* Multicasts with ttl 0 must not go beyond the host */
279
280 if (ip_hdr(skb)->ttl == 0) {
281 kfree_skb(skb);
282 return 0;
283 }
284 }
285
286 if (rt->rt_flags&RTCF_BROADCAST) {
287 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
288 if (newskb)
289 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb,
299 NULL, newskb->dev, ip_dev_loopback_xmit);
290 NULL, newskb->dev, dev_loopback_xmit);
300 }
301
302 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL,
303 skb->dev, ip_finish_output,
304 !(IPCB(skb)->flags & IPSKB_REROUTED));
305}
306
307int ip_output(struct sk_buff *skb)

--- 396 unchanged lines hidden (view full) ---

704 ip_send_check(iph);
705
706 err = output(skb2);
707 if (err)
708 goto fail;
709
710 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
711 }
291 }
292
293 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL,
294 skb->dev, ip_finish_output,
295 !(IPCB(skb)->flags & IPSKB_REROUTED));
296}
297
298int ip_output(struct sk_buff *skb)

--- 396 unchanged lines hidden (view full) ---

695 ip_send_check(iph);
696
697 err = output(skb2);
698 if (err)
699 goto fail;
700
701 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
702 }
712 kfree_skb(skb);
703 consume_skb(skb);
713 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
714 return err;
715
716fail:
717 kfree_skb(skb);
718 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
719 return err;
720}

--- 746 unchanged lines hidden (view full) ---

1467
1468 csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
1469 skb->csum = csum_block_add(skb->csum, csum, odd);
1470 return 0;
1471}
1472
1473/*
1474 * Generic function to send a packet as reply to another packet.
704 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
705 return err;
706
707fail:
708 kfree_skb(skb);
709 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
710 return err;
711}

--- 746 unchanged lines hidden (view full) ---

1458
1459 csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
1460 skb->csum = csum_block_add(skb->csum, csum, odd);
1461 return 0;
1462}
1463
1464/*
1465 * Generic function to send a packet as reply to another packet.
1475 * Used to send TCP resets so far. ICMP should use this function too.
1466 * Used to send some TCP resets/acks so far.
1476 *
1467 *
1477 * Should run single threaded per socket because it uses the sock
1478 * structure to pass arguments.
1468 * Use a fake percpu inet socket to avoid false sharing and contention.
1479 */
1469 */
1480void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
1481 const struct ip_reply_arg *arg, unsigned int len)
1470static DEFINE_PER_CPU(struct inet_sock, unicast_sock) = {
1471 .sk = {
1472 .__sk_common = {
1473 .skc_refcnt = ATOMIC_INIT(1),
1474 },
1475 .sk_wmem_alloc = ATOMIC_INIT(1),
1476 .sk_allocation = GFP_ATOMIC,
1477 .sk_flags = (1UL << SOCK_USE_WRITE_QUEUE),
1478 },
1479 .pmtudisc = IP_PMTUDISC_WANT,
1480};
1481
1482void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
1483 __be32 saddr, const struct ip_reply_arg *arg,
1484 unsigned int len)
1482{
1485{
1483 struct inet_sock *inet = inet_sk(sk);
1484 struct ip_options_data replyopts;
1485 struct ipcm_cookie ipc;
1486 struct flowi4 fl4;
1487 struct rtable *rt = skb_rtable(skb);
1486 struct ip_options_data replyopts;
1487 struct ipcm_cookie ipc;
1488 struct flowi4 fl4;
1489 struct rtable *rt = skb_rtable(skb);
1490 struct sk_buff *nskb;
1491 struct sock *sk;
1492 struct inet_sock *inet;
1488
1489 if (ip_options_echo(&replyopts.opt.opt, skb))
1490 return;
1491
1492 ipc.addr = daddr;
1493 ipc.opt = NULL;
1494 ipc.tx_flags = 0;
1495
1496 if (replyopts.opt.opt.optlen) {
1497 ipc.opt = &replyopts.opt;
1498
1499 if (replyopts.opt.opt.srr)
1500 daddr = replyopts.opt.opt.faddr;
1501 }
1502
1503 flowi4_init_output(&fl4, arg->bound_dev_if, 0,
1504 RT_TOS(arg->tos),
1493
1494 if (ip_options_echo(&replyopts.opt.opt, skb))
1495 return;
1496
1497 ipc.addr = daddr;
1498 ipc.opt = NULL;
1499 ipc.tx_flags = 0;
1500
1501 if (replyopts.opt.opt.optlen) {
1502 ipc.opt = &replyopts.opt;
1503
1504 if (replyopts.opt.opt.srr)
1505 daddr = replyopts.opt.opt.faddr;
1506 }
1507
1508 flowi4_init_output(&fl4, arg->bound_dev_if, 0,
1509 RT_TOS(arg->tos),
1505 RT_SCOPE_UNIVERSE, sk->sk_protocol,
1510 RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
1506 ip_reply_arg_flowi_flags(arg),
1511 ip_reply_arg_flowi_flags(arg),
1507 daddr, rt->rt_spec_dst,
1512 daddr, saddr,
1508 tcp_hdr(skb)->source, tcp_hdr(skb)->dest);
1509 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
1513 tcp_hdr(skb)->source, tcp_hdr(skb)->dest);
1514 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
1510 rt = ip_route_output_key(sock_net(sk), &fl4);
1515 rt = ip_route_output_key(net, &fl4);
1511 if (IS_ERR(rt))
1512 return;
1513
1516 if (IS_ERR(rt))
1517 return;
1518
1514 /* And let IP do all the hard work.
1519 inet = &get_cpu_var(unicast_sock);
1515
1520
1516 This chunk is not reenterable, hence spinlock.
1517 Note that it uses the fact, that this function is called
1518 with locally disabled BH and that sk cannot be already spinlocked.
1519 */
1520 bh_lock_sock(sk);
1521 inet->tos = arg->tos;
1521 inet->tos = arg->tos;
1522 sk = &inet->sk;
1522 sk->sk_priority = skb->priority;
1523 sk->sk_protocol = ip_hdr(skb)->protocol;
1524 sk->sk_bound_dev_if = arg->bound_dev_if;
1523 sk->sk_priority = skb->priority;
1524 sk->sk_protocol = ip_hdr(skb)->protocol;
1525 sk->sk_bound_dev_if = arg->bound_dev_if;
1526 sock_net_set(sk, net);
1527 __skb_queue_head_init(&sk->sk_write_queue);
1528 sk->sk_sndbuf = sysctl_wmem_default;
1525 ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1526 &ipc, &rt, MSG_DONTWAIT);
1529 ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1530 &ipc, &rt, MSG_DONTWAIT);
1527 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1531 nskb = skb_peek(&sk->sk_write_queue);
1532 if (nskb) {
1528 if (arg->csumoffset >= 0)
1533 if (arg->csumoffset >= 0)
1529 *((__sum16 *)skb_transport_header(skb) +
1530 arg->csumoffset) = csum_fold(csum_add(skb->csum,
1534 *((__sum16 *)skb_transport_header(nskb) +
1535 arg->csumoffset) = csum_fold(csum_add(nskb->csum,
1531 arg->csum));
1536 arg->csum));
1532 skb->ip_summed = CHECKSUM_NONE;
1537 nskb->ip_summed = CHECKSUM_NONE;
1538 skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
1533 ip_push_pending_frames(sk, &fl4);
1534 }
1535
1539 ip_push_pending_frames(sk, &fl4);
1540 }
1541
1536 bh_unlock_sock(sk);
1542 put_cpu_var(unicast_sock);
1537
1538 ip_rt_put(rt);
1539}
1540
1541void __init ip_init(void)
1542{
1543 ip_rt_init();
1544 inet_initpeers();
1545
1546#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
1547 igmp_mc_proc_init();
1548#endif
1549}
1543
1544 ip_rt_put(rt);
1545}
1546
1547void __init ip_init(void)
1548{
1549 ip_rt_init();
1550 inet_initpeers();
1551
1552#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
1553 igmp_mc_proc_init();
1554#endif
1555}