比较版本

Jason A. Donenfeld · Jason A. Donenfeld · Jason A. Donenfeld · Jason A. Donenfeld · Jason A. Donenfeld · Jason A. Donenfeld
--- a/src/allowedips.c
+++ b/src/allowedips.c
@@ -30,8 +30,11 @@ static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src,
 	node->bitlen = bits;
 	memcpy(node->bits, src, bits / 8U);
 }
-#define CHOOSE_NODE(parent, key) \
-	parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
+static inline u8 choose(struct allowedips_node *node, const u8 *key)
+{
+	return (key[node->bit_at_a] >> node->bit_at_b) & 1;
+}
 static void push_rcu(struct allowedips_node **stack,
 		     struct allowedips_node __rcu *p, unsigned int *len)
@@ -112,7 +115,7 @@ static struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits,
 			found = node;
 		if (node->cidr == bits)
 			break;
-		node = rcu_dereference_bh(CHOOSE_NODE(node, key));
+		node = rcu_dereference_bh(node->bit[choose(node, key)]);
 	}
 	return found;
 }
@@ -144,8 +147,7 @@ static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key,
 			   u8 cidr, u8 bits, struct allowedips_node **rnode,
 			   struct mutex *lock)
 {
-	struct allowedips_node *node = rcu_dereference_protected(trie,
+	struct allowedips_node *node = rcu_dereference_protected(trie, lockdep_is_held(lock));
-						lockdep_is_held(lock));
 	struct allowedips_node *parent = NULL;
 	bool exact = false;
@@ -155,13 +157,24 @@ static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key,
 			exact = true;
 			break;
 		}
-		node = rcu_dereference_protected(CHOOSE_NODE(parent, key),
+		node = rcu_dereference_protected(parent->bit[choose(parent, key)], lockdep_is_held(lock));
-						 lockdep_is_held(lock));
 	}
 	*rnode = parent;
 	return exact;
 }
+static inline void connect_node(struct allowedips_node __rcu **parent, u8 bit, struct allowedips_node *node)
+{
+	node->parent_bit_packed = (unsigned long)parent | bit;
+	rcu_assign_pointer(*parent, node);
+}
+static inline void choose_and_connect_node(struct allowedips_node *parent, struct allowedips_node *node)
+{
+	u8 bit = choose(parent, node->bits);
+	connect_node(&parent->bit[bit], bit, node);
+}
 static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
 	       u8 cidr, struct wg_peer *peer, struct mutex *lock)
 {
@@ -177,8 +190,7 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
 		RCU_INIT_POINTER(node->peer, peer);
 		list_add_tail(&node->peer_list, &peer->allowedips_list);
 		copy_and_assign_cidr(node, key, cidr, bits);
-		rcu_assign_pointer(node->parent_bit, trie);
+		connect_node(trie, 2, node);
-		rcu_assign_pointer(*trie, node);
 		return 0;
 	}
 	if (node_placement(*trie, key, cidr, bits, &node, lock)) {
@@ -197,10 +209,10 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
 	if (!node) {
 		down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
 	} else {
-		down = rcu_dereference_protected(CHOOSE_NODE(node, key), lockdep_is_held(lock));
+		const u8 bit = choose(node, key);
+		down = rcu_dereference_protected(node->bit[bit], lockdep_is_held(lock));
 		if (!down) {
-			rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, key));
+			connect_node(&node->bit[bit], bit, newnode);
-			rcu_assign_pointer(CHOOSE_NODE(node, key), newnode);
 			return 0;
 		}
 	}
@@ -208,15 +220,11 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
 	parent = node;
 	if (newnode->cidr == cidr) {
-		rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(newnode, down->bits));
+		choose_and_connect_node(newnode, down);
-		rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down);
+		if (!parent)
-		if (!parent) {
+			connect_node(trie, 2, newnode);
-			rcu_assign_pointer(newnode->parent_bit, trie);
+		else
-			rcu_assign_pointer(*trie, newnode);
+			choose_and_connect_node(parent, newnode);
-		} else {
-			rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(parent, newnode->bits));
-			rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), newnode);
-		}
 		return 0;
 	}
@@ -229,17 +237,12 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
 	INIT_LIST_HEAD(&node->peer_list);
 	copy_and_assign_cidr(node, newnode->bits, cidr, bits);
-	rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(node, down->bits));
+	choose_and_connect_node(node, down);
-	rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
+	choose_and_connect_node(node, newnode);
-	rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, newnode->bits));
+	if (!parent)
-	rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
+		connect_node(trie, 2, node);
-	if (!parent) {
+	else
-		rcu_assign_pointer(node->parent_bit, trie);
+		choose_and_connect_node(parent, node);
-		rcu_assign_pointer(*trie, node);
-	} else {
-		rcu_assign_pointer(node->parent_bit, &CHOOSE_NODE(parent, node->bits));
-		rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), node);
-	}
 	return 0;
 }
@@ -297,7 +300,8 @@ int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip,
 void wg_allowedips_remove_by_peer(struct allowedips *table,
 				  struct wg_peer *peer, struct mutex *lock)
 {
-	struct allowedips_node *node, *child, *tmp;
+	struct allowedips_node *node, *child, **parent_bit, *parent, *tmp;
+	bool free_parent;
 	if (list_empty(&peer->allowedips_list))
 		return;
@@ -307,19 +311,29 @@ void wg_allowedips_remove_by_peer(struct allowedips *table,
 		RCU_INIT_POINTER(node->peer, NULL);
 		if (node->bit[0] && node->bit[1])
 			continue;
-		child = rcu_dereference_protected(
+		child = rcu_dereference_protected(node->bit[!rcu_access_pointer(node->bit[0])],
-				node->bit[!rcu_access_pointer(node->bit[0])],
+						  lockdep_is_held(lock));
-				lockdep_is_held(lock));
 		if (child)
-			child->parent_bit = node->parent_bit;
+			child->parent_bit_packed = node->parent_bit_packed;
-		*rcu_dereference_protected(node->parent_bit, lockdep_is_held(lock)) = child;
+		parent_bit = (struct allowedips_node **)(node->parent_bit_packed & ~3UL);
+		*parent_bit = child;
+		parent = (void *)parent_bit -
+			 offsetof(struct allowedips_node, bit[node->parent_bit_packed & 1]);
+		free_parent = !rcu_access_pointer(node->bit[0]) &&
+			      !rcu_access_pointer(node->bit[1]) &&
+			      (node->parent_bit_packed & 3) <= 1 &&
+			      !rcu_access_pointer(parent->peer);
+		if (free_parent)
+			child = rcu_dereference_protected(
+					parent->bit[!(node->parent_bit_packed & 1)],
+					lockdep_is_held(lock));
 		call_rcu(&node->rcu, node_free_rcu);
+		if (!free_parent)
-		/* TODO: Note that we currently don't walk up and down in order to
+			continue;
-		 * free any potential filler nodes. This means that this function
+		if (child)
-		 * doesn't free up as much as it could, which could be revisited
+			child->parent_bit_packed = parent->parent_bit_packed;
-		 * at some point.
+		*(struct allowedips_node **)(parent->parent_bit_packed & ~3UL) = child;
-		 */
+		call_rcu(&parent->rcu, node_free_rcu);
 	}
 }

--- a/src/allowedips.h
+++ b/src/allowedips.h
@@ -19,7 +19,7 @@ struct allowedips_node {
 	u8 bits[16] __aligned(__alignof(u64));
 	/* Keep rarely used members at bottom to be beyond cache line. */
-	struct allowedips_node *__rcu *parent_bit;
+	unsigned long parent_bit_packed;
 	union {
 		struct list_head peer_list;
 		struct rcu_head rcu;
@@ -30,7 +30,7 @@ struct allowedips {
 	struct allowedips_node __rcu *root4;
 	struct allowedips_node __rcu *root6;
 	u64 seq;
-};
+} __aligned(4); /* We pack the lower 2 bits of &root, but m68k only gives 16-bit alignment. */
 void wg_allowedips_init(struct allowedips *table);
 void wg_allowedips_free(struct allowedips *table, struct mutex *mutex);

--- a/src/compat/Kbuild.include
+++ b/src/compat/Kbuild.include
@@ -12,6 +12,10 @@ ifeq ($(wildcard $(srctree)/include/linux/ptr_ring.h),)
 ccflags-y += -I$(kbuild-dir)/compat/ptr_ring/include
 endif
+ifeq ($(wildcard $(srctree)/include/linux/skb_array.h),)
+ccflags-y += -I$(kbuild-dir)/compat/skb_array/include
+endif
 ifeq ($(wildcard $(srctree)/include/linux/siphash.h),)
 ccflags-y += -I$(kbuild-dir)/compat/siphash/include
 wireguard-y += compat/siphash/siphash.o
@@ -65,6 +69,10 @@ ifeq ($(wildcard $(srctree)/arch/arm64/include/asm/neon.h)$(CONFIG_ARM64),y)
 ccflags-y += -I$(kbuild-dir)/compat/neon-arm/include
 endif
+ifeq ($(wildcard $(srctree)/include/net/dst_metadata.h),)
+ccflags-y += -I$(kbuild-dir)/compat/dstmetadata/include
+endif
 ifeq ($(CONFIG_X86_64),y)
 	ifeq ($(ssse3_instr),)
 		ssse3_instr := $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)

--- a/src/compat/compat-asm.h
+++ b/src/compat/compat-asm.h
@@ -15,14 +15,11 @@
 #define ISRHEL7
 #elif RHEL_MAJOR == 8
 #define ISRHEL8
-#if RHEL_MINOR >= 4
-#define ISCENTOS8S
-#endif
 #endif
 #endif
 /* PaX compatibility */
-#if defined(RAP_PLUGIN)
+#if defined(RAP_PLUGIN) && defined(RAP_ENTRY)
 #undef ENTRY
 #define ENTRY RAP_ENTRY
 #endif
@@ -51,7 +48,7 @@
 #undef pull
 #endif
-#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 76) && !defined(ISCENTOS8S)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 76) && !defined(ISRHEL8) && !defined(SYM_FUNC_START)
 #define SYM_FUNC_START ENTRY
 #define SYM_FUNC_END ENDPROC
 #endif

--- a/src/compat/compat.h
+++ b/src/compat/compat.h
@@ -16,15 +16,10 @@
 #define ISRHEL7
 #elif RHEL_MAJOR == 8
 #define ISRHEL8
-#if RHEL_MINOR >= 4
-#define ISCENTOS8S
-#endif
 #endif
 #endif
 #ifdef UTS_UBUNTU_RELEASE_ABI
-#if LINUX_VERSION_CODE == KERNEL_VERSION(3, 13, 11)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
-#define ISUBUNTU1404
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
 #define ISUBUNTU1604
 #elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0)
 #define ISUBUNTU1804
@@ -219,7 +214,7 @@ static inline void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 #define skb_scrub_packet(a, b) skb_scrub_packet(a)
 #endif
-#if ((LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 63) || defined(ISUBUNTU1404)) && !defined(ISRHEL7)
+#if ((LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 63)) && !defined(ISRHEL7)
 #include <linux/random.h>
 static inline u32 __compat_prandom_u32_max(u32 ep_ro)
 {
@@ -268,7 +263,7 @@ static inline u32 __compat_prandom_u32_max(u32 ep_ro)
 #endif
 #endif
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 3) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 35) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 24) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) && !defined(ISUBUNTU1404)) || (LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 33) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 60) && !defined(ISRHEL7))
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 3) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 35) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 24) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 33) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 60) && !defined(ISRHEL7))
 static inline void memzero_explicit(void *s, size_t count)
 {
 	memset(s, 0, count);
@@ -281,7 +276,7 @@ static const struct in6_addr __compat_in6addr_any = IN6ADDR_ANY_INIT;
 #define in6addr_any __compat_in6addr_any
 #endif
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) && (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 320))
 #include <linux/completion.h>
 #include <linux/random.h>
 #include <linux/errno.h>
@@ -325,7 +320,7 @@ static inline int wait_for_random_bytes(void)
 }
 #endif
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) && !defined(ISRHEL8)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) && (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 285)) && (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 320)) && !defined(ISRHEL8)
 #include <linux/random.h>
 #include <linux/slab.h>
 struct rng_is_initialized_callback {
@@ -377,7 +372,7 @@ static inline bool rng_is_initialized(void)
 }
 #endif
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) && (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 320))
 static inline int get_random_bytes_wait(void *buf, int nbytes)
 {
 	int ret = wait_for_random_bytes();
@@ -502,7 +497,7 @@ static inline void *__compat_kvzalloc(size_t size, gfp_t flags)
 #define kvzalloc __compat_kvzalloc
 #endif
-#if ((LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 41)) && !defined(ISUBUNTU1404)
+#if ((LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 41))
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
 static inline void __compat_kvfree(const void *addr)
@@ -515,6 +510,28 @@ static inline void __compat_kvfree(const void *addr)
 #define kvfree __compat_kvfree
 #endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+static inline void *__compat_kvmalloc_array(size_t n, size_t size, gfp_t flags)
+{
+	if (n != 0 && SIZE_MAX / n < size)
+		return NULL;
+	return kvmalloc(n * size, flags);
+}
+#define kvmalloc_array __compat_kvmalloc_array
+#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 18, 0)
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+static inline void *__compat_kvcalloc(size_t n, size_t size, gfp_t flags)
+{
+        return kvmalloc_array(n, size, flags | __GFP_ZERO);
+}
+#define kvcalloc __compat_kvcalloc
+#endif
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 9)
 #include <linux/netdevice.h>
 #define priv_destructor destructor
@@ -704,7 +721,7 @@ static inline void *skb_put_data(struct sk_buff *skb, const void *data, unsigned
 #endif
 #endif
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 17, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 17, 0) && (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 285)) && (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 320))
 static inline void le32_to_cpu_array(u32 *buf, unsigned int words)
 {
 	while (words--) {
@@ -757,7 +774,7 @@ static inline void crypto_xor_cpy(u8 *dst, const u8 *src1, const u8 *src2,
 #define hlist_add_behind(a, b) hlist_add_after(b, a)
 #endif
-#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 0, 0) && !defined(ISCENTOS8S)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 0, 0) && !defined(ISRHEL8)
 #define totalram_pages() totalram_pages
 #endif
@@ -831,10 +848,16 @@ static inline void skb_mark_not_on_list(struct sk_buff *skb)
 #endif
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 20, 0) && !defined(ISRHEL8)
+#include <net/netlink.h>
+#ifndef NLA_POLICY_EXACT_LEN
 #define NLA_POLICY_EXACT_LEN(_len) { .type = NLA_UNSPEC, .len = _len }
 #endif
+#endif
 #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) && !defined(ISRHEL8)
+#include <net/netlink.h>
+#ifndef NLA_POLICY_MIN_LEN
 #define NLA_POLICY_MIN_LEN(_len) { .type = NLA_UNSPEC, .len = _len }
+#endif
 #define COMPAT_CANNOT_INDIVIDUAL_NETLINK_OPS_POLICY
 #endif
@@ -849,7 +872,7 @@ static inline void skb_mark_not_on_list(struct sk_buff *skb)
 #endif
 #endif
-#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 5, 0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 5, 0) && !defined(ISRHEL8)
 #define genl_dumpit_info(cb) ({ \
 	struct { struct nlattr **attrs; } *a = (void *)((u8 *)cb->args + offsetofend(struct dump_ctx, next_allowedip)); \
 	BUILD_BUG_ON(sizeof(cb->args) < offsetofend(struct dump_ctx, next_allowedip) + sizeof(*a)); \
@@ -869,11 +892,13 @@ static inline void skb_mark_not_on_list(struct sk_buff *skb)
 #endif
 #endif
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 4, 200) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 20, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 249)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 285)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 320))
 #define blake2s_init zinc_blake2s_init
 #define blake2s_init_key zinc_blake2s_init_key
 #define blake2s_update zinc_blake2s_update
 #define blake2s_final zinc_blake2s_final
+#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
 #define blake2s_hmac zinc_blake2s_hmac
 #define chacha20 zinc_chacha20
 #define hchacha20 zinc_hchacha20
@@ -1096,6 +1121,37 @@ static const struct header_ops ip_tunnel_header_ops = { .parse_protocol = ip_tun
 #endif
 #endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0)
+#include <net/dst_cache.h>
+struct dst_cache_pcpu {
+	unsigned long refresh_ts;
+	struct dst_entry *dst;
+	u32 cookie;
+	union {
+		struct in_addr in_saddr;
+		struct in6_addr in6_saddr;
+	};
+};
+#define COMPAT_HAS_DEFINED_DST_CACHE_PCPU
+static inline void dst_cache_reset_now(struct dst_cache *dst_cache)
+{
+	int i;
+	if (!dst_cache->cache)
+		return;
+	dst_cache->reset_ts = jiffies;
+	for_each_possible_cpu(i) {
+		struct dst_cache_pcpu *idst = per_cpu_ptr(dst_cache->cache, i);
+		struct dst_entry *dst = idst->dst;
+		idst->cookie = 0;
+		idst->dst = NULL;
+		dst_release(dst);
+	}
+}
+#endif
 #if defined(ISUBUNTU1604) || defined(ISRHEL7)
 #include <linux/siphash.h>
 #ifndef _WG_LINUX_SIPHASH_H
@@ -1127,7 +1183,7 @@ static const struct header_ops ip_tunnel_header_ops = { .parse_protocol = ip_tun
 #undef __read_mostly
 #define __read_mostly
 #endif
-#if (defined(RAP_PLUGIN) || defined(CONFIG_CFI_CLANG)) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
+#if (defined(CONFIG_PAX) || defined(CONFIG_CFI_CLANG)) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
 #include <linux/timer.h>
 #define wg_expired_retransmit_handshake(a) wg_expired_retransmit_handshake(unsigned long timer)
 #define wg_expired_send_keepalive(a) wg_expired_send_keepalive(unsigned long timer)

--- a/src/compat/dst_cache/dst_cache.c
+++ b/src/compat/dst_cache/dst_cache.c
@@ -27,6 +27,7 @@ static inline u32 rt6_get_cookie(const struct rt6_info *rt)
 #endif
 #include <uapi/linux/in.h>
+#ifndef COMPAT_HAS_DEFINED_DST_CACHE_PCPU
 struct dst_cache_pcpu {
 	unsigned long refresh_ts;
 	struct dst_entry *dst;
@@ -36,6 +37,7 @@ struct dst_cache_pcpu {
 		struct in6_addr in6_saddr;
 	};
 };
+#endif
 static void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache,
 				      struct dst_entry *dst, u32 cookie)

--- a/src/compat/dstmetadata/include/net/dst_metadata.h
+++ b/src/compat/dstmetadata/include/net/dst_metadata.h
+#ifndef skb_valid_dst
+#define skb_valid_dst(skb) (!!skb_dst(skb))
+#endif
--- a/src/compat/siphash/include/linux/siphash.h
+++ b/src/compat/siphash/include/linux/siphash.h
@@ -22,9 +22,7 @@ typedef struct {
 } siphash_key_t;
 u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key);
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key);
-#endif
 u64 siphash_1u64(const u64 a, const siphash_key_t *key);
 u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key);
@@ -77,10 +75,9 @@ static inline u64 ___siphash_aligned(const __le64 *data, size_t len,
 static inline u64 siphash(const void *data, size_t len,
 			  const siphash_key_t *key)
 {
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
-	if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
+	    !IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
 		return __siphash_unaligned(data, len, key);
-#endif
 	return ___siphash_aligned(data, len, key);
 }
@@ -91,10 +88,8 @@ typedef struct {
 u32 __hsiphash_aligned(const void *data, size_t len,
 		       const hsiphash_key_t *key);
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u32 __hsiphash_unaligned(const void *data, size_t len,
 			 const hsiphash_key_t *key);
-#endif
 u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key);
 u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key);
@@ -130,10 +125,9 @@ static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len,
 static inline u32 hsiphash(const void *data, size_t len,
 			   const hsiphash_key_t *key)
 {
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
-	if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
+	    !IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
 		return __hsiphash_unaligned(data, len, key);
-#endif
 	return ___hsiphash_aligned(data, len, key);
 }

--- a/src/compat/siphash/siphash.c
+++ b/src/compat/siphash/siphash.c
@@ -57,6 +57,7 @@
 	SIPROUND; \
 	return (v0 ^ v1) ^ (v2 ^ v3);
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key)
 {
 	const u8 *end = data + len - (len % sizeof(u64));
@@ -76,19 +77,19 @@ u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key)
 						  bytemask_from_count(left)));
 #else
 	switch (left) {
-	case 7: b |= ((u64)end[6]) << 48;
+	case 7: b |= ((u64)end[6]) << 48; fallthrough;
-	case 6: b |= ((u64)end[5]) << 40;
+	case 6: b |= ((u64)end[5]) << 40; fallthrough;
-	case 5: b |= ((u64)end[4]) << 32;
+	case 5: b |= ((u64)end[4]) << 32; fallthrough;
 	case 4: b |= le32_to_cpup(data); break;
-	case 3: b |= ((u64)end[2]) << 16;
+	case 3: b |= ((u64)end[2]) << 16; fallthrough;
 	case 2: b |= le16_to_cpup(data); break;
 	case 1: b |= end[0];
 	}
 #endif
 	POSTAMBLE
 }
+#endif
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key)
 {
 	const u8 *end = data + len - (len % sizeof(u64));
@@ -108,18 +109,17 @@ u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key)
 						  bytemask_from_count(left)));
 #else
 	switch (left) {
-	case 7: b |= ((u64)end[6]) << 48;
+	case 7: b |= ((u64)end[6]) << 48; fallthrough;
-	case 6: b |= ((u64)end[5]) << 40;
+	case 6: b |= ((u64)end[5]) << 40; fallthrough;
-	case 5: b |= ((u64)end[4]) << 32;
+	case 5: b |= ((u64)end[4]) << 32; fallthrough;
 	case 4: b |= get_unaligned_le32(end); break;
-	case 3: b |= ((u64)end[2]) << 16;
+	case 3: b |= ((u64)end[2]) << 16; fallthrough;
 	case 2: b |= get_unaligned_le16(end); break;
 	case 1: b |= end[0];
 	}
 #endif
 	POSTAMBLE
 }
-#endif
 /**
 * siphash_1u64 - compute 64-bit siphash PRF value of a u64
@@ -250,6 +250,7 @@ u64 siphash_3u32(const u32 first, const u32 second, const u32 third,
 	HSIPROUND; \
 	return (v0 ^ v1) ^ (v2 ^ v3);
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
 {
 	const u8 *end = data + len - (len % sizeof(u64));
@@ -268,19 +269,19 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
 						  bytemask_from_count(left)));
 #else
 	switch (left) {
-	case 7: b |= ((u64)end[6]) << 48;
+	case 7: b |= ((u64)end[6]) << 48; fallthrough;
-	case 6: b |= ((u64)end[5]) << 40;
+	case 6: b |= ((u64)end[5]) << 40; fallthrough;
-	case 5: b |= ((u64)end[4]) << 32;
+	case 5: b |= ((u64)end[4]) << 32; fallthrough;
 	case 4: b |= le32_to_cpup(data); break;
-	case 3: b |= ((u64)end[2]) << 16;
+	case 3: b |= ((u64)end[2]) << 16; fallthrough;
 	case 2: b |= le16_to_cpup(data); break;
 	case 1: b |= end[0];
 	}
 #endif
 	HPOSTAMBLE
 }
+#endif
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u32 __hsiphash_unaligned(const void *data, size_t len,
 			 const hsiphash_key_t *key)
 {
@@ -300,18 +301,17 @@ u32 __hsiphash_unaligned(const void *data, size_t len,
 						  bytemask_from_count(left)));
 #else
 	switch (left) {
-	case 7: b |= ((u64)end[6]) << 48;
+	case 7: b |= ((u64)end[6]) << 48; fallthrough;
-	case 6: b |= ((u64)end[5]) << 40;
+	case 6: b |= ((u64)end[5]) << 40; fallthrough;
-	case 5: b |= ((u64)end[4]) << 32;
+	case 5: b |= ((u64)end[4]) << 32; fallthrough;
 	case 4: b |= get_unaligned_le32(end); break;
-	case 3: b |= ((u64)end[2]) << 16;
+	case 3: b |= ((u64)end[2]) << 16; fallthrough;
 	case 2: b |= get_unaligned_le16(end); break;
 	case 1: b |= end[0];
 	}
 #endif
 	HPOSTAMBLE
 }
-#endif
 /**
 * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32
@@ -412,6 +412,7 @@ u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
 	HSIPROUND; \
 	return v1 ^ v3;
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
 {
 	const u8 *end = data + len - (len % sizeof(u32));
@@ -425,14 +426,14 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
 		v0 ^= m;
 	}
 	switch (left) {
-	case 3: b |= ((u32)end[2]) << 16;
+	case 3: b |= ((u32)end[2]) << 16; fallthrough;
 	case 2: b |= le16_to_cpup(data); break;
 	case 1: b |= end[0];
 	}
 	HPOSTAMBLE
 }
+#endif
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u32 __hsiphash_unaligned(const void *data, size_t len,
 			 const hsiphash_key_t *key)
 {
@@ -447,13 +448,12 @@ u32 __hsiphash_unaligned(const void *data, size_t len,
 		v0 ^= m;
 	}
 	switch (left) {
-	case 3: b |= ((u32)end[2]) << 16;
+	case 3: b |= ((u32)end[2]) << 16; fallthrough;
 	case 2: b |= get_unaligned_le16(end); break;
 	case 1: b |= end[0];
 	}
 	HPOSTAMBLE
 }
-#endif
 /**
 * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32

--- a/src/compat/skb_array/include/linux/skb_array.h
+++ b/src/compat/skb_array/include/linux/skb_array.h
+#ifndef _WG_SKB_ARRAY_H
+#define _WG_SKB_ARRAY_H
+#include <linux/skbuff.h>
+static void __skb_array_destroy_skb(void *ptr)
+{
+	kfree_skb(ptr);
+}
+#endif
--- a/src/compat/udp_tunnel/udp_tunnel.c
+++ b/src/compat/udp_tunnel/udp_tunnel.c
@@ -38,9 +38,10 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
 	struct socket *sock = NULL;
 	struct sockaddr_in udp_addr;
-	err = __sock_create(net, AF_INET, SOCK_DGRAM, 0, &sock, 1);
+	err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock);
 	if (err < 0)
 		goto error;
+	sk_change_net(sock->sk, net);
 	udp_addr.sin_family = AF_INET;
 	udp_addr.sin_addr = cfg->local_ip;
@@ -72,7 +73,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
 error:
 	if (sock) {
 		kernel_sock_shutdown(sock, SHUT_RDWR);
-		sock_release(sock);
+		sk_release_kernel(sock->sk);
 	}
 	*sockp = NULL;
 	return err;
@@ -229,7 +230,7 @@ void udp_tunnel_sock_release(struct socket *sock)
 {
 	rcu_assign_sk_user_data(sock->sk, NULL);
 	kernel_sock_shutdown(sock, SHUT_RDWR);
-	sock_release(sock);
+	sk_release_kernel(sock->sk);
 }
 #if IS_ENABLED(CONFIG_IPV6)
@@ -254,9 +255,10 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
 	int err;
 	struct socket *sock = NULL;
-	err = __sock_create(net, AF_INET6, SOCK_DGRAM, 0, &sock, 1);
+	err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
 	if (err < 0)
 		goto error;
+	sk_change_net(sock->sk, net);
 	if (cfg->ipv6_v6only) {
 		int val = 1;
@@ -301,7 +303,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
 error:
 	if (sock) {
 		kernel_sock_shutdown(sock, SHUT_RDWR);
-		sock_release(sock);
+		sk_release_kernel(sock->sk);
 	}
 	*sockp = NULL;
 	return err;

--- a/src/crypto/zinc/curve25519/curve25519-x86_64.c
+++ b/src/crypto/zinc/curve25519/curve25519-x86_64.c
--- a/src/device.c
+++ b/src/device.c
@@ -19,6 +19,7 @@
 #include <linux/if_arp.h>
 #include <linux/icmp.h>
 #include <linux/suspend.h>
+#include <net/dst_metadata.h>
 #include <net/icmp.h>
 #include <net/rtnetlink.h>
 #include <net/ip_tunnels.h>
@@ -106,6 +107,7 @@ static int wg_stop(struct net_device *dev)
 {
 	struct wg_device *wg = netdev_priv(dev);
 	struct wg_peer *peer;
+	struct sk_buff *skb;
 	mutex_lock(&wg->device_update_lock);
 	list_for_each_entry(peer, &wg->peer_list, peer_list) {
@@ -116,7 +118,9 @@ static int wg_stop(struct net_device *dev)
 		wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
 	}
 	mutex_unlock(&wg->device_update_lock);
-	skb_queue_purge(&wg->incoming_handshakes);
+	while ((skb = ptr_ring_consume(&wg->handshake_queue.ring)) != NULL)
+		kfree_skb(skb);
+	atomic_set(&wg->handshake_queue_len, 0);
 	wg_socket_reinit(wg, NULL, NULL);
 	return 0;
 }
@@ -157,7 +161,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto err_peer;
 	}
-	mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+	mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 	__skb_queue_head_init(&packets);
 	if (!skb_is_gso(skb)) {
@@ -243,14 +247,13 @@ static void wg_destruct(struct net_device *dev)
 	destroy_workqueue(wg->handshake_receive_wq);
 	destroy_workqueue(wg->handshake_send_wq);
 	destroy_workqueue(wg->packet_crypt_wq);
-	wg_packet_queue_free(&wg->decrypt_queue);
+	wg_packet_queue_free(&wg->handshake_queue, true);
-	wg_packet_queue_free(&wg->encrypt_queue);
+	wg_packet_queue_free(&wg->decrypt_queue, false);
+	wg_packet_queue_free(&wg->encrypt_queue, false);
 	rcu_barrier(); /* Wait for all the peers to be actually freed. */
 	wg_ratelimiter_uninit();
 	memzero_explicit(&wg->static_identity, sizeof(wg->static_identity));
-	skb_queue_purge(&wg->incoming_handshakes);
 	free_percpu(dev->tstats);
-	free_percpu(wg->incoming_handshakes_worker);
 	kvfree(wg->index_hashtable);
 	kvfree(wg->peer_hashtable);
 	mutex_unlock(&wg->device_update_lock);
@@ -312,7 +315,6 @@ static int wg_newlink(struct net *src_net, struct net_device *dev,
 	init_rwsem(&wg->static_identity.lock);
 	mutex_init(&wg->socket_update_lock);
 	mutex_init(&wg->device_update_lock);
-	skb_queue_head_init(&wg->incoming_handshakes);
 	wg_allowedips_init(&wg->peer_allowedips);
 	wg_cookie_checker_init(&wg->cookie_checker, wg);
 	INIT_LIST_HEAD(&wg->peer_list);
@@ -330,16 +332,10 @@ static int wg_newlink(struct net *src_net, struct net_device *dev,
 	if (!dev->tstats)
 		goto err_free_index_hashtable;
-	wg->incoming_handshakes_worker =
-		wg_packet_percpu_multicore_worker_alloc(
-				wg_packet_handshake_receive_worker, wg);
-	if (!wg->incoming_handshakes_worker)
-		goto err_free_tstats;
 	wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s",
 			WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name);
 	if (!wg->handshake_receive_wq)
-		goto err_free_incoming_handshakes;
+		goto err_free_tstats;
 	wg->handshake_send_wq = alloc_workqueue("wg-kex-%s",
 			WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name);
@@ -361,10 +357,15 @@ static int wg_newlink(struct net *src_net, struct net_device *dev,
 	if (ret < 0)
 		goto err_free_encrypt_queue;
-	ret = wg_ratelimiter_init();
+	ret = wg_packet_queue_init(&wg->handshake_queue, wg_packet_handshake_receive_worker,
+				   MAX_QUEUED_INCOMING_HANDSHAKES);
 	if (ret < 0)
 		goto err_free_decrypt_queue;
+	ret = wg_ratelimiter_init();
+	if (ret < 0)
+		goto err_free_handshake_queue;
 	ret = register_netdevice(dev);
 	if (ret < 0)
 		goto err_uninit_ratelimiter;
@@ -381,18 +382,18 @@ static int wg_newlink(struct net *src_net, struct net_device *dev,
 err_uninit_ratelimiter:
 	wg_ratelimiter_uninit();
+err_free_handshake_queue:
+	wg_packet_queue_free(&wg->handshake_queue, false);
 err_free_decrypt_queue:
-	wg_packet_queue_free(&wg->decrypt_queue);
+	wg_packet_queue_free(&wg->decrypt_queue, false);
 err_free_encrypt_queue:
-	wg_packet_queue_free(&wg->encrypt_queue);
+	wg_packet_queue_free(&wg->encrypt_queue, false);
 err_destroy_packet_crypt:
 	destroy_workqueue(wg->packet_crypt_wq);
 err_destroy_handshake_send:
 	destroy_workqueue(wg->handshake_send_wq);
 err_destroy_handshake_receive:
 	destroy_workqueue(wg->handshake_receive_wq);
-err_free_incoming_handshakes:
-	free_percpu(wg->incoming_handshakes_worker);
 err_free_tstats:
 	free_percpu(dev->tstats);
 err_free_index_hashtable:
@@ -412,6 +413,7 @@ static struct rtnl_link_ops link_ops __read_mostly = {
 static void wg_netns_pre_exit(struct net *net)
 {
 	struct wg_device *wg;
+	struct wg_peer *peer;
 	rtnl_lock();
 	list_for_each_entry(wg, &device_list, device_list) {
@@ -421,6 +423,8 @@ static void wg_netns_pre_exit(struct net *net)
 			mutex_lock(&wg->device_update_lock);
 			rcu_assign_pointer(wg->creating_net, NULL);
 			wg_socket_reinit(wg, NULL, NULL);
+			list_for_each_entry(peer, &wg->peer_list, peer_list)
+				wg_socket_clear_peer_endpoint_src(peer);
 			mutex_unlock(&wg->device_update_lock);
 		}
 	}

--- a/src/device.h
+++ b/src/device.h
@@ -39,21 +39,18 @@ struct prev_queue {
 struct wg_device {
 	struct net_device *dev;
-	struct crypt_queue encrypt_queue, decrypt_queue;
+	struct crypt_queue encrypt_queue, decrypt_queue, handshake_queue;
 	struct sock __rcu *sock4, *sock6;
 	struct net __rcu *creating_net;
 	struct noise_static_identity static_identity;
-	struct workqueue_struct *handshake_receive_wq, *handshake_send_wq;
+	struct workqueue_struct *packet_crypt_wq,*handshake_receive_wq, *handshake_send_wq;
-	struct workqueue_struct *packet_crypt_wq;
-	struct sk_buff_head incoming_handshakes;
-	int incoming_handshake_cpu;
-	struct multicore_worker __percpu *incoming_handshakes_worker;
 	struct cookie_checker cookie_checker;
 	struct pubkey_hashtable *peer_hashtable;
 	struct index_hashtable *index_hashtable;
 	struct allowedips peer_allowedips;
 	struct mutex device_update_lock, socket_update_lock;
 	struct list_head device_list, peer_list;
+	atomic_t handshake_queue_len;
 	unsigned int num_peers, device_update_gen;
 	u32 fwmark;
 	u16 incoming_port;

--- a/src/dkms.conf
+++ b/src/dkms.conf
 PACKAGE_NAME="wireguard"
-PACKAGE_VERSION="1.0.20210424"
+PACKAGE_VERSION="1.0.20220627"
 AUTOINSTALL=yes
 BUILT_MODULE_NAME="wireguard"

--- a/src/main.c
+++ b/src/main.c
@@ -17,7 +17,7 @@
 #include <linux/genetlink.h>
 #include <net/rtnetlink.h>
-static int __init mod_init(void)
+static int __init wg_mod_init(void)
 {
 	int ret;
@@ -65,7 +65,7 @@ err_allowedips:
 	return ret;
 }
-static void __exit mod_exit(void)
+static void __exit wg_mod_exit(void)
 {
 	wg_genetlink_uninit();
 	wg_device_uninit();
@@ -73,8 +73,8 @@ static void __exit mod_exit(void)
 	wg_allowedips_slab_uninit();
 }
-module_init(mod_init);
+module_init(wg_mod_init);
-module_exit(mod_exit);
+module_exit(wg_mod_exit);
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("WireGuard secure network tunnel");
 MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");

--- a/src/queueing.c
+++ b/src/queueing.c
@@ -4,6 +4,7 @@
 */
 #include "queueing.h"
+#include <linux/skb_array.h>
 struct multicore_worker __percpu *
 wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr)
@@ -38,11 +39,11 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
 	return 0;
 }
-void wg_packet_queue_free(struct crypt_queue *queue)
+void wg_packet_queue_free(struct crypt_queue *queue, bool purge)
 {
 	free_percpu(queue->worker);
-	WARN_ON(!__ptr_ring_empty(&queue->ring));
+	WARN_ON(!purge && !__ptr_ring_empty(&queue->ring));
-	ptr_ring_cleanup(&queue->ring, NULL);
+	ptr_ring_cleanup(&queue->ring, purge ? __skb_array_destroy_skb : NULL);
 }
 #define NEXT(skb) ((skb)->prev)

--- a/src/queueing.h
+++ b/src/queueing.h
@@ -23,7 +23,7 @@ struct sk_buff;
 /* queueing.c APIs: */
 int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
 			 unsigned int len);
-void wg_packet_queue_free(struct crypt_queue *queue);
+void wg_packet_queue_free(struct crypt_queue *queue, bool purge);
 struct multicore_worker __percpu *
 wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr);

--- a/src/ratelimiter.c
+++ b/src/ratelimiter.c
@@ -188,12 +188,12 @@ int wg_ratelimiter_init(void)
 			(1U << 14) / sizeof(struct hlist_head)));
 	max_entries = table_size * 8;
-	table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL);
+	table_v4 = kvcalloc(table_size, sizeof(*table_v4), GFP_KERNEL);
 	if (unlikely(!table_v4))
 		goto err_kmemcache;
 #if IS_ENABLED(CONFIG_IPV6)
-	table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL);
+	table_v6 = kvcalloc(table_size, sizeof(*table_v6), GFP_KERNEL);
 	if (unlikely(!table_v6)) {
 		kvfree(table_v4);
 		goto err_kmemcache;

--- a/src/receive.c
+++ b/src/receive.c
@@ -117,8 +117,8 @@ static void wg_receive_handshake_packet(struct wg_device *wg,
 		return;
 	}
-	under_load = skb_queue_len(&wg->incoming_handshakes) >=
+	under_load = atomic_read(&wg->handshake_queue_len) >=
-		     MAX_QUEUED_INCOMING_HANDSHAKES / 8;
+			MAX_QUEUED_INCOMING_HANDSHAKES / 8;
 	if (under_load) {
 		last_under_load = ktime_get_coarse_boottime_ns();
 	} else if (last_under_load) {
@@ -213,13 +213,14 @@ static void wg_receive_handshake_packet(struct wg_device *wg,
 void wg_packet_handshake_receive_worker(struct work_struct *work)
 {
-	struct wg_device *wg = container_of(work, struct multicore_worker,
+	struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr;
-					    work)->ptr;
+	struct wg_device *wg = container_of(queue, struct wg_device, handshake_queue);
 	struct sk_buff *skb;
-	while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) {
+	while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) {
 		wg_receive_handshake_packet(wg, skb);
 		dev_kfree_skb(skb);
+		atomic_dec(&wg->handshake_queue_len);
 		cond_resched();
 	}
 }
@@ -562,22 +563,28 @@ void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb)
 	case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION):
 	case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE):
 	case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): {
-		int cpu;
+		int cpu, ret = -EBUSY;
-		if (skb_queue_len(&wg->incoming_handshakes) >
+		if (unlikely(!rng_is_initialized()))
-			    MAX_QUEUED_INCOMING_HANDSHAKES ||
+			goto drop;
-		    unlikely(!rng_is_initialized())) {
+		if (atomic_read(&wg->handshake_queue_len) > MAX_QUEUED_INCOMING_HANDSHAKES / 2) {
+			if (spin_trylock_bh(&wg->handshake_queue.ring.producer_lock)) {
+				ret = __ptr_ring_produce(&wg->handshake_queue.ring, skb);
+				spin_unlock_bh(&wg->handshake_queue.ring.producer_lock);
+			}
+		} else
+			ret = ptr_ring_produce_bh(&wg->handshake_queue.ring, skb);
+		if (ret) {
+	drop:
 			net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n",
 						wg->dev->name, skb);
 			goto err;
 		}
-		skb_queue_tail(&wg->incoming_handshakes, skb);
+		atomic_inc(&wg->handshake_queue_len);
-		/* Queues up a call to packet_process_queued_handshake_
+		cpu = wg_cpumask_next_online(&wg->handshake_queue.last_cpu);
-		 * packets(skb):
+		/* Queues up a call to packet_process_queued_handshake_packets(skb): */
-		 */
-		cpu = wg_cpumask_next_online(&wg->incoming_handshake_cpu);
 		queue_work_on(cpu, wg->handshake_receive_wq,
-			&per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work);
+			      &per_cpu_ptr(wg->handshake_queue.worker, cpu)->work);
 		break;
 	}
 	case cpu_to_le32(MESSAGE_DATA):
No results found