// SPDX-License-Identifier: GPL-2.0-or-later
/*
* NET3 Protocol independent device support routines.
*
* Derived from the non IP parts of dev.c 1.0.19
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Mark Evans, <evansmp@uhura.aston.ac.uk>
*
* Additional Authors:
* Florian la Roche <rzsfl@rz.uni-sb.de>
* Alan Cox <gw4pts@gw4pts.ampr.org>
* David Hinds <dahinds@users.sourceforge.net>
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
* Adam Sulmicki <adam@cfar.umd.edu>
* Pekka Riikonen <priikone@poesidon.pspt.fi>
*
* Changes:
* D.J. Barrow : Fixed bug where dev->refcnt gets set
* to 2 if register_netdev gets called
* before net_dev_init & also removed a
* few lines of code in the process.
* Alan Cox : device private ioctl copies fields back.
* Alan Cox : Transmit queue code does relevant
* stunts to keep the queue safe.
* Alan Cox : Fixed double lock.
* Alan Cox : Fixed promisc NULL pointer trap
* ???????? : Support the full private ioctl range
* Alan Cox : Moved ioctl permission check into
* drivers
* Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
* Alan Cox : 100 backlog just doesn't cut it when
* you start doing multicast video 8)
* Alan Cox : Rewrote net_bh and list manager.
* Alan Cox : Fix ETH_P_ALL echoback lengths.
* Alan Cox : Took out transmit every packet pass
* Saved a few bytes in the ioctl handler
* Alan Cox : Network driver sets packet type before
* calling netif_rx. Saves a function
* call a packet.
* Alan Cox : Hashed net_bh()
* Richard Kooijman: Timestamp fixes.
* Alan Cox : Wrong field in SIOCGIFDSTADDR
* Alan Cox : Device lock protection.
* Alan Cox : Fixed nasty side effect of device close
* changes.
* Rudi Cilibrasi : Pass the right thing to
* set_mac_address()
* Dave Miller : 32bit quantity for the device lock to
* make it work out on a Sparc.
* Bjorn Ekwall : Added KERNELD hack.
* Alan Cox : Cleaned up the backlog initialise.
* Craig Metz : SIOCGIFCONF fix if space for under
* 1 device.
* Thomas Bogendoerfer : Return ENODEV for dev_open, if there
* is no device open function.
* Andi Kleen : Fix error reporting for SIOCGIFCONF
* Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
* Cyrus Durgin : Cleaned for KMOD
* Adam Sulmicki : Bug Fix : Network Device Unload
* A network device unload needs to purge
* the backlog queue.
* Paul Rusty Russell : SIOCSIFNAME
* Pekka Riikonen : Netdev boot-time settings code
* Andrew Morton : Make unregister_netdevice wait
* indefinitely on dev->refcnt
* J Hadi Salim : - Backlog queue sampling
* - netif_rx() feedback
*/
#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <linux/capability.h>
#include <linux/cpu.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/hash.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/if_ether.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/skbuff.h>
#include <linux/kthread.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/busy_poll.h>
#include <linux/rtnetlink.h>
#include <linux/stat.h>
#include <net/dsa.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
#include <net/gro.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
#include <net/checksum.h>
#include <net/xfrm.h>
#include <linux/highmem.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netpoll.h>
#include <linux/rcupdate.h>
#include <linux/delay.h>
#include <net/iw_handler.h>
#include <asm/current.h>
#include <linux/audit.h>
#include <linux/dmaengine.h>
#include <linux/err.h>
#include <linux/ctype.h>
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <net/ip.h>
#include <net/mpls.h>
#include <linux/ipv6.h>
#include <linux/in.h>
#include <linux/jhash.h>
#include <linux/random.h>
#include <trace/events/napi.h>
#include <trace/events/net.h>
#include <trace/events/skb.h>
#include <linux/inetdevice.h>
#include <linux/cpu_rmap.h>
#include <linux/static_key.h>
#include <linux/hashtable.h>
#include <linux/vmalloc.h>
#include <linux/if_macvlan.h>
#include <linux/errqueue.h>
#include <linux/hrtimer.h>
#include <linux/netfilter_ingress.h>
#include <linux/crash_dump.h>
#include <linux/sctp.h>
#include <net/udp_tunnel.h>
#include <linux/net_namespace.h>
#include <linux/indirect_call_wrapper.h>
#include <net/devlink.h>
#include <linux/pm_runtime.h>
#include <linux/prandom.h>
#include "net-sysfs.h"
#define MAX_GRO_SKBS 8
/* This should be increased if a protocol with a bigger head is added. */
#define GRO_MAX_HEAD (MAX_HEADER + 128)
static DEFINE_SPINLOCK(ptype_lock);
static DEFINE_SPINLOCK(offload_lock);
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
struct list_head ptype_all __read_mostly; /* Taps */
static struct list_head offload_base __read_mostly;
static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_info(unsigned long val,
struct netdev_notifier_info *info);
static int call_netdevice_notifiers_extack(unsigned long val,
struct net_device *dev,
struct netlink_ext_ack *extack);
static struct napi_struct *napi_by_id(unsigned int napi_id);
/*
* The @dev_base_head list is protected by @dev_base_lock and the rtnl
* semaphore.
*
* Pure readers hold dev_base_lock for reading, or rcu_read_lock()
*
* Writers must hold the rtnl semaphore while they loop through the
* dev_base_head list, and hold dev_base_lock for writing when they do the
* actual updates. This allows pure readers to access the list even
* while a writer is preparing to update it.
*
* To put it another way, dev_base_lock is held for writing only to
* protect against pure readers; the rtnl semaphore provides the
* protection against other writers.
*
* See, for example usages, register_netdevice() and
* unregister_netdevice(), which must be called with the rtnl
* semaphore held.
*/
DEFINE_RWLOCK(dev_base_lock);
EXPORT_SYMBOL(dev_base_lock);
static DEFINE_MUTEX(ifalias_mutex);
/* protects napi_hash addition/deletion and napi_gen_id */
static DEFINE_SPINLOCK(napi_hash_lock);
static unsigned int napi_gen_id = NR_CPUS;
static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
static DECLARE_RWSEM(devnet_rename_sem);
static inline void dev_base_seq_inc(struct net *net)
{
while (++net->dev_base_seq == 0)
;
}
static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
{
unsigned int hash = full_name_hash(net, name, strnlen(name, IFNAMSIZ));
return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
}
static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
{
return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
}
static inline void rps_lock(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
spin_lock(&sd->input_pkt_queue.lock);
#endif
}
static inline void rps_unlock(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
spin_unlock(&sd->input_pkt_queue.lock);
#endif
}
static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
const char *name)
{
struct netdev_name_node *name_node;
name_node = kmalloc(sizeof(*name_node), GFP_KERNEL);
if (!name_node)
return NULL;
INIT_HLIST_NODE(&name_node->hlist);
name_node->dev = dev;
name_node->name = name;
return name_node;
}
static struct netdev_name_node *
netdev_name_node_head_alloc(struct net_device *dev)
{
struct netdev_name_node *name_node;
name_node = netdev_name_node_alloc(dev, dev->name);
if (!name_node)
return NULL;
INIT_LIST_HEAD(&name_node->list);
return name_node;
}
static void netdev_name_node_free(struct netdev_name_node *name_node)
{
kfree(name_node);
}
static void netdev_name_node_add(struct net *net,
struct netdev_name_node *name_node)
{
hlist_add_head_rcu(&name_node->hlist,
dev_name_hash(net, name_node->name));
}
static void netdev_name_node_del(struct netdev_name_node *name_node)
{
hlist_del_rcu(&name_node->hlist);
}
static struct netdev_name_node *netdev_name_node_lookup(struct net *net,
const char *name)
{
struct hlist_head *head = dev_name_hash(net, name);
struct netdev_name_node *name_node;
hlist_for_each_entry(name_node, head, hlist)
if (!strcmp(name_node->name, name))
return name_node;
return NULL;
}
static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net,
const char *name)
{
struct hlist_head *head = dev_name_hash(net, name);
struct netdev_name_node *name_node;
hlist_for_each_entry_rcu(name_node, head, hlist)
if (!strcmp(name_node->name, name))
return name_node;
return NULL;
}
int netdev_name_node_alt_create(struct net_device *dev, const char *name)
{
struct netdev_name_node *name_node;
struct net *net = dev_net(dev);
name_node = netdev_name_node_lookup(net, name);
if (name_node)
return -EEXIST;
name_node = netdev_name_node_alloc(dev, name);
if (!name_node)
return -ENOMEM;
netdev_name_node_add(net, name_node);
/* The node that holds dev->name acts as a head of per-device list. */
list_add_tail(&name_node->list, &dev->name_node->list);
return 0;
}
EXPORT_SYMBOL(netdev_name_node_alt_create);
static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
{
list_del(&name_node->list);
netdev_name_node_del(name_node);
kfree(name_node->name);
netdev_name_node_free(name_node);
}
int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
{
struct netdev_name_node *name_node;
struct net *net = dev_net(dev);
name_node = netdev_name_node_lookup(net, name);
if (!name_node)
return -ENOENT;
/* lookup might have found our primary name or a name belonging
* to another device.
*/
if (name_node == dev->name_node || name_node->dev != dev)
return -EINVAL;
__netdev_name_node_alt_destroy(name_node);
return 0;
}
EXPORT_SYMBOL(netdev_name_node_alt_destroy);
static void netdev_name_node_alt_flush(struct net_device *dev)
{
struct netdev_name_node *name_node, *tmp;
list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list)
__netdev_name_node_alt_destroy(name_node);
}
/* Device list insertion */
static void list_netdevice(struct net_device *dev)
{
struct net *net = dev_net(dev);
ASSERT_RTNL();
write_lock_bh(&dev_base_lock);
list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
netdev_name_node_add(net, dev->name_node);
hlist_add_head_rcu(&dev->index_hlist,
dev_index_hash(net, dev->ifindex));
write_unlock_bh(&dev_base_lock);
dev_base_seq_inc(net);
}
/* Device list removal
* caller must respect a RCU grace period before freeing/reusing dev
*/
static void unlist_netdevice(struct net_device *dev)
{
ASSERT_RTNL();
/* Unlink dev from the device chain */
write_lock_bh(&dev_base_lock);
list_del_rcu(&dev->dev_list);
netdev_name_node_del(dev->name_node);
hlist_del_rcu(&dev->index_hlist);
write_unlock_bh(&dev_base_lock);
dev_base_seq_inc(dev_net(dev));
}
/*
* Our notifier list
*/
static RAW_NOTIFIER_HEAD(netdev_chain);
/*
* Device drivers call our routines to queue packets here. We empty the
* queue in the local softnet handler.
*/
DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
EXPORT_PER_CPU_SYMBOL(softnet_data);
#ifdef CONFIG_LOCKDEP
/*
* register_netdevice() inits txq->_xmit_lock and sets lockdep class
* according to dev->type
*/
static const unsigned short netdev_lock_type[] = {
ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
static const char *const netdev_lock_name[] = {
"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
"_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
"_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
"_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
"_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
"_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
"_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
"_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
"_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
"_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
"_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
"_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
"_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
"_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
"_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
static inline unsigned short netdev_lock_pos(unsigned short dev_type)
{
int i;
for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
if (netdev_lock_type[i] == dev_type)
return i;
/* the last key is used by default */
return ARRAY_SIZE(netdev_lock_type) - 1;
}
static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
unsigned short dev_type)
{
int i;
i = netdev_lock_pos(dev_type);
lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
netdev_lock_name[i]);
}
static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
{
int i;
i = netdev_lock_pos(dev->type);
lockdep_set_class_and_name(&dev->addr_list_lock,
&netdev_addr_lock_key[i],
netdev_lock_name[i]);
}
#else
static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
unsigned short dev_type)
{
}
static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
{
}
#endif
/*******************************************************************************
*
* Protocol management and registration routines
*
*******************************************************************************/
/*
* Add a protocol ID to the list. Now that the input handler is
* smarter we can dispense with all the messy stuff that used to be
* here.
*
* BEWARE!!! Protocol handlers, mangling input packets,
* MUST BE last in hash buckets and checking protocol handlers
* MUST start from promiscuous ptype_all chain in net_bh.
* It is true now, do not change it.
* Explanation follows: if protocol handler, mangling packet, will
* be the first on list, it is not able to sense, that packet
* is cloned and should be copied-on-write, so that it will
* change it and subsequent readers will get broken packet.
* --ANK (980803)
*/
static inline struct list_head *ptype_head(const struct packet_type *pt)
{
if (pt->type == htons(ETH_P_ALL))
return pt->dev ? &pt->dev->ptype_all : &ptype_all;
else
return pt->dev ? &pt->dev->ptype_specific :
&ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
}
/**
* dev_add_pack - add packet handler
* @pt: packet type declaration
*
* Add a protocol handler to the networking stack. The passed &packet_type
* is linked into kernel lists and may not be freed until it has been
* removed from the kernel lists.
*
* This call does not sleep therefore it can not
* guarantee all CPU's that are in middle of receiving packets
* will see the new packet type (until the next received packet).
*/
void dev_add_pack(struct packet_type *pt)
{
struct list_head *head = ptype_head(pt);
spin_lock(&ptype_lock);
list_add_rcu(&pt->list, head);
spin_unlock(&ptype_lock);
}
EXPORT_SYMBOL(dev_add_pack);
/**
* __dev_remove_pack - remove packet handler
* @pt: packet type declaration
*
* Remove a protocol handler that was previously added to the kernel
* protocol handlers by dev_add_pack(). The passed &packet_type is removed
* from the kernel lists and can be freed or reused once this function
* returns.
*
* The packet type might still be in use by receivers
* and must not be freed until after all the CPU's have gone
* through a quiescent state.
*/
void __dev_remove_pack(struct packet_type *pt)
{
struct list_head *head = ptype_head(pt);
struct packet_type *pt1;
spin_lock(&ptype_lock);
list_for_each_entry(pt1, head, list) {
if (pt == pt1) {
list_del_rcu(&pt->list);
goto out;
}
}
pr_warn("dev_remove_pack: %p not found\n", pt);
out:
spin_unlock(&ptype_lock);
}
EXPORT_SYMBOL(__dev_remove_pack);
/**
* dev_remove_pack - remove packet handler
* @pt: packet type declaration
*
* Remove a protocol handler that was previously added to the kernel
* protocol handlers by dev_add_pack(). The passed &packet_type is removed
* from the kernel lists and can be freed or reused once this function
* returns.
*
* This call sleeps to guarantee that no CPU is looking at the packet
* type after return.
*/
void dev_remove_pack(struct packet_type *pt)
{
__dev_remove_pack(pt);
synchronize_net();
}
EXPORT_SYMBOL(dev_remove_pack);
/**
* dev_add_offload - register offload handlers
* @po: protocol offload declaration
*
* Add protocol offload handlers to the networking stack. The passed
* &proto_offload is linked into kernel lists and may not be freed until
* it has been removed from the kernel lists.
*