git: f4d3aa749084 - main - netlink: suppress sending NLMSG_ERROR if NLMSG_DONE is already sent

From: Alexander V. Chernikov <melifaro_at_FreeBSD.org>
Date: Wed, 30 Nov 2022 13:25:58 UTC
The branch main has been updated by melifaro:

URL: https://cgit.FreeBSD.org/src/commit/?id=f4d3aa74908496f1f5815caca94ebd86944b17cb

commit f4d3aa74908496f1f5815caca94ebd86944b17cb
Author:     Alexander V. Chernikov <melifaro@FreeBSD.org>
AuthorDate: 2022-11-30 12:15:23 +0000
Commit:     Alexander V. Chernikov <melifaro@FreeBSD.org>
CommitDate: 2022-11-30 13:24:38 +0000

    netlink: suppress sending NLMSG_ERROR if NLMSG_DONE is already sent
    
    Netlink has a confirmation/error reporting mechanism for the sent
    messages. Kernel explicitly acks each messages if requested (NLM_F_ACK)
     or if message processing results in an error.
    Similarly, for multipart messages - typically dumps, where each message
     represents a single object like an interface or a route - another
     message, NLMSG_DONE is used to indicate the end of dump and the
     resulting status.
    As a result, successfull dump ends with both NLMSG_DONE and NLMSG_ERROR
     messages.
    RFC 3549 does not say anything specific about such case.
    Linux adopted an optimisation which suppresses NLMSG_ERROR message
     when NLMSG_DONE is already sent. Certain libraries/applications like
     libnl depends on such behavior.
    
    Suppress sending NLMSG_ERROR if NLMSG_DONE is already sent, by
     setting newly-added 'suppress_ack' flag in the writer and checking
     this flag when generating ack.
    
    This change restores libnl compatibility.
    
    Before:
    ```
    ~ nl-link-list
    Error: Unable to allocate link cache: Message sequence number mismatch
    ````
    
    After:
    ```
    ~ nl-link-list
    vtnet0 ether 52:54:00:14:e3:19 <broadcast,multicast,up,running>
    lo0 ieee1394 <loopback,multicast,up,running>
    ```
    
    Reviewed by:    bapt,pauamma
    Tested by:      bapt
    Differential Revision: https://reviews.freebsd.org/D37565
---
 share/man/man4/netlink.4             |  9 +++++++--
 sys/netlink/netlink_io.c             | 13 ++++++++-----
 sys/netlink/netlink_message_writer.c |  4 ++++
 sys/netlink/netlink_message_writer.h |  1 +
 4 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/share/man/man4/netlink.4 b/share/man/man4/netlink.4
index c75366f560f0..bbfa55049e2e 100644
--- a/share/man/man4/netlink.4
+++ b/share/man/man4/netlink.4
@@ -24,7 +24,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd November 1, 2022
+.Dd November 30, 2022
 .Dt NETLINK 4
 .Os
 .Sh NAME
@@ -212,11 +212,16 @@ If the
 socket option is not set, the remainder of the original message will follow.
 If the
 .Dv NETLINK_EXT_ACK
-socket option is set, kernel may add a
+socket option is set, the kernel may add a
 .Dv NLMSGERR_ATTR_MSG
 string TLV with the textual error description, optionally followed by the
 .Dv NLMSGERR_ATTR_OFFS
 TLV, indicating the offset from the message start that triggered an error.
+If the operation reply is a multipart message, then no
+.Dv NLMSG_ERROR
+reply is generated, only a
+.Dv NLMSG_DONE
+message, closing multipart sequence.
 .Pp
 .Dv NLMSG_DONE
 indicates the end of the message group: typically, the end of the dump.
diff --git a/sys/netlink/netlink_io.c b/sys/netlink/netlink_io.c
index b2a0023a143b..fb8006f689e4 100644
--- a/sys/netlink/netlink_io.c
+++ b/sys/netlink/netlink_io.c
@@ -405,8 +405,9 @@ nl_receive_message(struct nlmsghdr *hdr, int remaining_length,
 	nl_handler_f handler = nl_handlers[nlp->nl_proto].cb;
 	int error = 0;
 
-	NL_LOG(LOG_DEBUG2, "msg len: %d type: %d", hdr->nlmsg_len,
-	    hdr->nlmsg_type);
+	NLP_LOG(LOG_DEBUG2, nlp, "msg len: %u type: %d: flags: 0x%X seq: %u pid: %u",
+	    hdr->nlmsg_len, hdr->nlmsg_type, hdr->nlmsg_flags, hdr->nlmsg_seq,
+	    hdr->nlmsg_pid);
 
 	if (__predict_false(hdr->nlmsg_len > remaining_length)) {
 		NLP_LOG(LOG_DEBUG, nlp, "message is not entirely present: want %d got %d",
@@ -439,9 +440,10 @@ nl_receive_message(struct nlmsghdr *hdr, int remaining_length,
 		NL_LOG(LOG_DEBUG2, "retcode: %d", error);
 	}
 	if ((hdr->nlmsg_flags & NLM_F_ACK) || (error != 0 && error != EINTR)) {
-		NL_LOG(LOG_DEBUG3, "ack");
-		nlmsg_ack(nlp, error, hdr, npt);
-		NL_LOG(LOG_DEBUG3, "done");
+		if (!npt->nw->suppress_ack) {
+			NL_LOG(LOG_DEBUG3, "ack");
+			nlmsg_ack(nlp, error, hdr, npt);
+		}
 	}
 
 	return (0);
@@ -455,6 +457,7 @@ npt_clear(struct nl_pstate *npt)
 	npt->err_msg = NULL;
 	npt->err_off = 0;
 	npt->hdr = NULL;
+	npt->nw->suppress_ack = false;
 }
 
 /*
diff --git a/sys/netlink/netlink_message_writer.c b/sys/netlink/netlink_message_writer.c
index 1856f2859b01..37414703c6f6 100644
--- a/sys/netlink/netlink_message_writer.c
+++ b/sys/netlink/netlink_message_writer.c
@@ -600,6 +600,9 @@ nlmsg_end(struct nl_writer *nw)
 	}
 
         nw->hdr->nlmsg_len = (uint32_t)(nw->data + nw->offset - (char *)nw->hdr);
+	NL_LOG(LOG_DEBUG2, "wrote msg len: %u type: %d: flags: 0x%X seq: %u pid: %u",
+	    nw->hdr->nlmsg_len, nw->hdr->nlmsg_type, nw->hdr->nlmsg_flags,
+	    nw->hdr->nlmsg_seq, nw->hdr->nlmsg_pid);
         nw->hdr = NULL;
 	nw->num_messages++;
 	return (true);
@@ -681,6 +684,7 @@ nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr)
 	    nw->offset, perror);
 	*perror = error;
 	nlmsg_end(nw);
+	nw->suppress_ack = true;
 
 	return (true);
 }
diff --git a/sys/netlink/netlink_message_writer.h b/sys/netlink/netlink_message_writer.h
index 424983282e59..99f50fb94213 100644
--- a/sys/netlink/netlink_message_writer.h
+++ b/sys/netlink/netlink_message_writer.h
@@ -55,6 +55,7 @@ struct nl_writer {
 	uint8_t			writer_target;	/* NS_WRITER_TARGET_*  */
 	bool			ignore_limit;	/* If true, ignores RCVBUF limit */
 	bool			enomem;		/* True if ENOMEM occured */
+	bool			suppress_ack;	/* If true, don't send NLMSG_ERR */
 };
 #define	NS_WRITER_TARGET_SOCKET	0
 #define	NS_WRITER_TARGET_GROUP	1