libmnl  1.0.4
nfct-daemon.c
1 /* A very simple skeleton code that implements a daemon that collects
2  * conntrack statistics from ctnetlink.
3  *
4  * This example is placed in the public domain.
5  */
6 #include <endian.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <unistd.h>
11 #include <time.h>
12 #include <arpa/inet.h>
13 #include <netinet/in.h>
14 #include <inttypes.h>
15 #include <errno.h>
16 #include <sys/select.h>
17 
18 #include <libmnl/libmnl.h>
19 #include <linux/netlink.h>
20 #include <linux/netfilter/nfnetlink.h>
21 #include <linux/netfilter/nfnetlink_conntrack.h>
22 
23 #include <sys/queue.h>
24 
25 struct nstats {
26  LIST_ENTRY(nstats) list;
27 
28  uint8_t family;
29 
30  union {
31  struct in_addr ip;
32  struct in6_addr ip6;
33  };
34  uint64_t pkts, bytes;
35 };
36 
37 static LIST_HEAD(nstats_head, nstats) nstats_head;
38 
39 static int parse_counters_cb(const struct nlattr *attr, void *data)
40 {
41  const struct nlattr **tb = data;
42  int type = mnl_attr_get_type(attr);
43 
44  if (mnl_attr_type_valid(attr, CTA_COUNTERS_MAX) < 0)
45  return MNL_CB_OK;
46 
47  switch(type) {
48  case CTA_COUNTERS_PACKETS:
49  case CTA_COUNTERS_BYTES:
50  if (mnl_attr_validate(attr, MNL_TYPE_U64) < 0) {
51  perror("mnl_attr_validate");
52  return MNL_CB_ERROR;
53  }
54  break;
55  }
56  tb[type] = attr;
57  return MNL_CB_OK;
58 }
59 
60 static void parse_counters(const struct nlattr *nest, struct nstats *ns)
61 {
62  struct nlattr *tb[CTA_COUNTERS_MAX+1] = {};
63 
64  mnl_attr_parse_nested(nest, parse_counters_cb, tb);
65  if (tb[CTA_COUNTERS_PACKETS])
66  ns->pkts += be64toh(mnl_attr_get_u64(tb[CTA_COUNTERS_PACKETS]));
67 
68  if (tb[CTA_COUNTERS_BYTES])
69  ns->bytes += be64toh(mnl_attr_get_u64(tb[CTA_COUNTERS_BYTES]));
70 }
71 
72 static int parse_ip_cb(const struct nlattr *attr, void *data)
73 {
74  const struct nlattr **tb = data;
75  int type = mnl_attr_get_type(attr);
76 
77  if (mnl_attr_type_valid(attr, CTA_IP_MAX) < 0)
78  return MNL_CB_OK;
79 
80  switch(type) {
81  case CTA_IP_V4_SRC:
82  case CTA_IP_V4_DST:
83  if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) {
84  perror("mnl_attr_validate");
85  return MNL_CB_ERROR;
86  }
87  break;
88  case CTA_IP_V6_SRC:
89  case CTA_IP_V6_DST:
90  if (mnl_attr_validate2(attr, MNL_TYPE_BINARY,
91  sizeof(struct in6_addr)) < 0) {
92  perror("mnl_attr_validate2");
93  return MNL_CB_ERROR;
94  }
95  break;
96  }
97  tb[type] = attr;
98  return MNL_CB_OK;
99 }
100 
101 static void parse_ip(const struct nlattr *nest, struct nstats *ns)
102 {
103  struct nlattr *tb[CTA_IP_MAX+1] = {};
104 
105  mnl_attr_parse_nested(nest, parse_ip_cb, tb);
106  if (tb[CTA_IP_V4_SRC]) {
107  struct in_addr *in = mnl_attr_get_payload(tb[CTA_IP_V4_SRC]);
108  ns->ip = *in;
109  ns->family = AF_INET;
110  }
111  if (tb[CTA_IP_V6_SRC]) {
112  struct in6_addr *in = mnl_attr_get_payload(tb[CTA_IP_V6_SRC]);
113  ns->ip6 = *in;
114  ns->family = AF_INET6;
115  }
116 }
117 
118 static int parse_tuple_cb(const struct nlattr *attr, void *data)
119 {
120  const struct nlattr **tb = data;
121  int type = mnl_attr_get_type(attr);
122 
123  if (mnl_attr_type_valid(attr, CTA_TUPLE_MAX) < 0)
124  return MNL_CB_OK;
125 
126  switch(type) {
127  case CTA_TUPLE_IP:
128  if (mnl_attr_validate(attr, MNL_TYPE_NESTED) < 0) {
129  perror("mnl_attr_validate");
130  return MNL_CB_ERROR;
131  }
132  break;
133  }
134  tb[type] = attr;
135  return MNL_CB_OK;
136 }
137 
138 static void parse_tuple(const struct nlattr *nest, struct nstats *ns)
139 {
140  struct nlattr *tb[CTA_TUPLE_MAX+1] = {};
141 
142  mnl_attr_parse_nested(nest, parse_tuple_cb, tb);
143  if (tb[CTA_TUPLE_IP])
144  parse_ip(tb[CTA_TUPLE_IP], ns);
145 }
146 
147 static int data_attr_cb(const struct nlattr *attr, void *data)
148 {
149  const struct nlattr **tb = data;
150  int type = mnl_attr_get_type(attr);
151 
152  if (mnl_attr_type_valid(attr, CTA_MAX) < 0)
153  return MNL_CB_OK;
154 
155  switch(type) {
156  case CTA_TUPLE_ORIG:
157  case CTA_COUNTERS_ORIG:
158  case CTA_COUNTERS_REPLY:
159  if (mnl_attr_validate(attr, MNL_TYPE_NESTED) < 0) {
160  perror("mnl_attr_validate");
161  return MNL_CB_ERROR;
162  }
163  break;
164  }
165  tb[type] = attr;
166  return MNL_CB_OK;
167 }
168 
169 static int data_cb(const struct nlmsghdr *nlh, void *data)
170 {
171  struct nlattr *tb[CTA_MAX+1] = {};
172  struct nfgenmsg *nfg = mnl_nlmsg_get_payload(nlh);
173  struct nstats ns = {}, *cur, *new;
174 
175  mnl_attr_parse(nlh, sizeof(*nfg), data_attr_cb, tb);
176  if (tb[CTA_TUPLE_ORIG])
177  parse_tuple(tb[CTA_TUPLE_ORIG], &ns);
178 
179  if (tb[CTA_COUNTERS_ORIG])
180  parse_counters(tb[CTA_COUNTERS_ORIG], &ns);
181 
182  if (tb[CTA_COUNTERS_REPLY])
183  parse_counters(tb[CTA_COUNTERS_REPLY], &ns);
184 
185  /* Look up for existing statistics object ... */
186  LIST_FOREACH(cur, &nstats_head, list) {
187  if (memcmp(&ns.ip6, &cur->ip6, sizeof(struct in6_addr)) == 0) {
188  /* ... and sum counters */
189  cur->pkts += ns.pkts;
190  cur->bytes += ns.bytes;
191  return MNL_CB_OK;
192  }
193  }
194 
195  /* ... if it does not exist, add new stats object */
196  new = calloc(1, sizeof(struct nstats));
197  if (!new)
198  return MNL_CB_OK;
199 
200  new->family = ns.family;
201  new->ip6 = ns.ip6;
202  new->pkts = ns.pkts;
203  new->bytes = ns.bytes;
204 
205  LIST_INSERT_HEAD(&nstats_head, new, list);
206 
207  return MNL_CB_OK;
208 }
209 
210 static int handle(struct mnl_socket *nl)
211 {
212  char buf[MNL_SOCKET_BUFFER_SIZE];
213  int ret;
214 
215  ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
216  if (ret == -1) {
217  /* It only happens if NETLINK_NO_ENOBUFS is not set, it means
218  * we are leaking statistics.
219  */
220  if (errno == ENOBUFS) {
221  fprintf(stderr, "The daemon has hit ENOBUFS, you can "
222  "increase the size of your receiver "
223  "buffer to mitigate this or enable "
224  "reliable delivery.\n");
225  } else {
226  perror("mnl_socket_recvfrom");
227  }
228  return -1;
229  }
230 
231  ret = mnl_cb_run(buf, ret, 0, 0, data_cb, NULL);
232  if (ret == -1) {
233  perror("mnl_cb_run");
234  return -1;
235  } else if (ret <= MNL_CB_STOP)
236  return 0;
237 
238  return 0;
239 }
240 
241 int main(int argc, char *argv[])
242 {
243  struct mnl_socket *nl;
244  char buf[MNL_SOCKET_BUFFER_SIZE];
245  struct nlmsghdr *nlh;
246  struct nfgenmsg *nfh;
247  struct nstats *cur;
248  struct timeval tv = {};
249  int ret, secs, on = 1, buffersize = (1 << 22);
250 
251  if (argc != 2) {
252  printf("Usage: %s <poll-secs>\n", argv[0]);
253  exit(EXIT_FAILURE);
254  }
255  secs = atoi(argv[1]);
256 
257  LIST_INIT(&nstats_head);
258 
259  printf("Polling every %d seconds from kernel...\n", secs);
260 
261  /* Set high priority for this process, less chances to overrun
262  * the netlink receiver buffer since the scheduler gives this process
263  * more chances to run.
264  */
265  nice(-20);
266 
267  /* Open netlink socket to operate with netfilter */
268  nl = mnl_socket_open(NETLINK_NETFILTER);
269  if (nl == NULL) {
270  perror("mnl_socket_open");
271  exit(EXIT_FAILURE);
272  }
273 
274  /* Subscribe to destroy events to avoid leaking counters. The same
275  * socket is used to periodically atomically dump and reset counters.
276  */
277  if (mnl_socket_bind(nl, NF_NETLINK_CONNTRACK_DESTROY,
278  MNL_SOCKET_AUTOPID) < 0) {
279  perror("mnl_socket_bind");
280  exit(EXIT_FAILURE);
281  }
282 
283  /* Set netlink receiver buffer to 16 MBytes, to avoid packet drops */
284  setsockopt(mnl_socket_get_fd(nl), SOL_SOCKET, SO_RCVBUFFORCE,
285  &buffersize, sizeof(socklen_t));
286 
287  /* The two tweaks below enable reliable event delivery, packets may
288  * be dropped if the netlink receiver buffer overruns. This happens ...
289  *
290  * a) if the kernel spams this user-space process until the receiver
291  * is filled.
292  *
293  * or:
294  *
295  * b) if the user-space process does not pull messages from the
296  * receiver buffer so often.
297  */
298  mnl_socket_setsockopt(nl, NETLINK_BROADCAST_ERROR, &on, sizeof(int));
299  mnl_socket_setsockopt(nl, NETLINK_NO_ENOBUFS, &on, sizeof(int));
300 
301  nlh = mnl_nlmsg_put_header(buf);
302  /* Counters are atomically zeroed in each dump */
303  nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) |
304  IPCTNL_MSG_CT_GET_CTRZERO;
305  nlh->nlmsg_flags = NLM_F_REQUEST|NLM_F_DUMP;
306 
307  nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
308  nfh->nfgen_family = AF_INET;
309  nfh->version = NFNETLINK_V0;
310  nfh->res_id = 0;
311 
312  /* Filter by mark: We only want to dump entries whose mark is zero */
313  mnl_attr_put_u32(nlh, CTA_MARK, htonl(0));
314  mnl_attr_put_u32(nlh, CTA_MARK_MASK, htonl(0xffffffff));
315 
316  while (1) {
317  int fd_max = mnl_socket_get_fd(nl);
318  fd_set readfds;
319 
320  /* Every N seconds ... */
321  if (tv.tv_sec == 0 && tv.tv_usec == 0) {
322  /* ... request a fresh dump of the table from kernel */
323  ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
324  if (ret == -1) {
325  perror("mnl_socket_sendto");
326  return -1;
327  }
328  tv.tv_sec = secs;
329  tv.tv_usec = 0;
330 
331  /* print the content of the list */
332  LIST_FOREACH(cur, &nstats_head, list) {
333  char out[INET6_ADDRSTRLEN];
334 
335  if (inet_ntop(cur->family, &cur->ip, out, sizeof(out)))
336  printf("src=%s ", out);
337 
338  printf("counters %"PRIu64" %"PRIu64"\n",
339  cur->pkts, cur->bytes);
340  }
341  }
342 
343  FD_ZERO(&readfds);
344  FD_SET(mnl_socket_get_fd(nl), &readfds);
345 
346  ret = select(fd_max+1, &readfds, NULL, NULL, &tv);
347  if (ret < 0) {
348  if (errno == EINTR)
349  continue;
350 
351  perror("select");
352  exit(EXIT_FAILURE);
353  }
354 
355  /* Handled event and periodic atomic-dump-and-reset messages */
356  if (FD_ISSET(mnl_socket_get_fd(nl), &readfds)) {
357  if (handle(nl) < 0)
358  return EXIT_FAILURE;
359  }
360  }
361 
362  mnl_socket_close(nl);
363 
364  return 0;
365 }