bcc: verifier failure for a xdp code computing udp checksum

I filed this issue based on some old discussions in iovisor-dev mailing list. https://lists.iovisor.org/g/iovisor-dev/topic/30285987

I finally got some time to explore the solution for this and created a standalone example. Just to recap, the following is a simplified program.

-bash-4.4$ cat xdp_simple.c
#include <linux/bpf.h>
#include <stdio.h>
#include <string.h>
#include <linux/icmp.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/perf_event.h>

#include "bpf_helpers.h"
#include "bpf_endian.h"

/* 0x3FFF mask to check for fragment offset field */
#define IP_FRAGMENTED 65343

// MAC address
typedef unsigned char mac[6];

// Real Server structure (MAC address + IP address)
struct server {
    __be32 ipAddr;
    unsigned char macAddr[ETH_ALEN];
};

// packet structure to log load balancing
struct packet {
    unsigned char dmac[ETH_ALEN];
    unsigned char smac[ETH_ALEN];
    __be32 daddr;
    __be32 saddr;
};

struct {
        __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
        __uint(key_size, sizeof(int));
        __uint(value_size, sizeof(int));
} events SEC(".maps");

// A map which contains port to redirect
struct {
        __uint(type, BPF_MAP_TYPE_HASH);
        __uint(max_entries, 10);
        __type(key, __be16);
        __type(value, int);
} ports SEC(".maps");

// A map which contains real server 
struct {
        __uint(type, BPF_MAP_TYPE_HASH);
        __uint(max_entries, 10);
        __type(key, int);
        __type(value, struct server);
} realServers SEC(".maps");
// Virtual IP is accessible via the '0x5' constant

SEC("xdpclient")
int xdp_prog(struct xdp_md *ctx) {
    void *data_end = (void *)(long)ctx->data_end;
    void *data = (void *)(long)ctx->data;

    struct ethhdr * eth = data;
    if (eth + 1 > data_end)
        return XDP_DROP;

    if (eth->h_proto != bpf_htons(ETH_P_IP)){
        return XDP_PASS;
    }

    struct iphdr *iph;
    iph = eth + 1;
    if (iph + 1 > data_end)
        return XDP_DROP;

    if (iph->ihl < 5)
        return XDP_DROP;
    if (iph->ihl != 5) 
        return XDP_PASS;

    if (iph->protocol != IPPROTO_UDP) {
        return XDP_PASS;
    }

    struct udphdr *udp;
    udp = iph + 1;
    if (udp + 1 > data_end)
        return XDP_DROP;
    __u16 udp_len = bpf_ntohs(udp->len);
    if (udp_len < 8 || udp_len >= 512)
        return XDP_DROP;
    udp_len &= 0x1ff;
    if ((void *) udp + udp_len > data_end)
        return XDP_DROP;

    // Update UDP checksum
    __u64 cs = 0;
    udp->check = 0;
    cs = bpf_csum_diff(0, 0, udp, udp_len, cs);
    udp->check = cs;

    // Log packet after
    struct packet pkt = {};
    memcpy(&pkt, data, sizeof(pkt)); // crappy
    pkt.daddr = iph->daddr;
    pkt.saddr = iph->saddr;
    bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &pkt,sizeof(pkt));

    return XDP_TX;
}

char _license[] SEC("license") = "GPL";

The compilation command line looks like

clang -target bpf -O2 -c -I/home/yhs/work/linux/tools/include/uapi -I. -g xdp_simple.c

where the above include path pointing to linux repo for two helper header files.

Using bpftool (linux/tools/bpf/bpftool) to load and can reproduce the verifier issue:

./bpftool -d prog load ./xdp_simple.o /sys/fs/bpf/xdp_example type xdp
45: R0_w=inv1 R1_w=inv0 R2_w=pkt(id=1,off=34,r=34,umax_value=511,var_off=(0x0; 0x1ff)) R3=pkt(id=0,off=34,r=42,imm=0) R4_w=invP(id=0,umax_value=511,var_off=(0x0; 0x1ff)
) R6=ctx(id=0,off=0,imm=0) R7=pkt(id=0,off=0,r=42,imm=0) R10=fp0
; udp->check = 0;
45: (6b) *(u16 *)(r7 +40) = r1
46: R0_w=inv1 R1_w=inv0 R2_w=pkt(id=1,off=34,r=34,umax_value=511,var_off=(0x0; 0x1ff)) R3=pkt(id=0,off=34,r=42,imm=0) R4_w=invP(id=0,umax_value=511,var_off=(0x0; 0x1ff)
) R6=ctx(id=0,off=0,imm=0) R7=pkt(id=0,off=0,r=42,imm=0) R10=fp0
; cs = bpf_csum_diff(0, 0, udp, udp_len, cs);
46: (b7) r1 = 0
47: R0_w=inv1 R1_w=inv0 R2_w=pkt(id=1,off=34,r=34,umax_value=511,var_off=(0x0; 0x1ff)) R3=pkt(id=0,off=34,r=42,imm=0) R4_w=invP(id=0,umax_value=511,var_off=(0x0; 0x1ff)
) R6=ctx(id=0,off=0,imm=0) R7=pkt(id=0,off=0,r=42,imm=0) R10=fp0
47: (b7) r2 = 0
48: R0_w=inv1 R1_w=inv0 R2_w=inv0 R3=pkt(id=0,off=34,r=42,imm=0) R4_w=invP(id=0,umax_value=511,var_off=(0x0; 0x1ff)) R6=ctx(id=0,off=0,imm=0) R7=pkt(id=0,off=0,r=42,imm
=0) R10=fp0
48: (b7) r5 = 0
49: R0_w=inv1 R1_w=inv0 R2_w=inv0 R3=pkt(id=0,off=34,r=42,imm=0) R4_w=invP(id=0,umax_value=511,var_off=(0x0; 0x1ff)) R5_w=inv0 R6=ctx(id=0,off=0,imm=0) R7=pkt(id=0,off=
0,r=42,imm=0) R10=fp0
49: (85) call bpf_csum_diff#28
last_idx 49 first_idx 37
regs=4 stack=0 before 48: (b7) r5 = 0
regs=4 stack=0 before 47: (b7) r2 = 0
invalid access to packet, off=34 size=511, R3(id=0,off=34,r=42)
R3 offset is outside of the packet

About this issue

  • Original URL
  • State: open
  • Created 5 years ago
  • Comments: 18 (2 by maintainers)

Most upvoted comments

Hi @Rayoluo, if you need to compute the checksum from scratch you can use a bounded loop:

#define MAX_UDP_LENGTH 1480

[...]

u32 csum_buffer = 0;
u16 *buf = (void *)udp;

// Compute pseudo-header checksum
csum_buffer += (u16)ip->saddr;
csum_buffer += (u16)(ip->saddr >> 16);
csum_buffer += (u16)ip->daddr;
csum_buffer += (u16)(ip->daddr >> 16);
csum_buffer += (u16)ip->protocol << 8;
csum_buffer += udp->len;

// Compute checksum on udp header + payload
for (int i = 0; i < MAX_UDP_LENGTH; i += 2) {
  if ((void *)(buf + 1) > data_end) {
    break;
  }
  csum_buffer += *buf;
  buf++;
}
if ((void *)buf + 1 <= data_end) {
  // In case payload is not 2 bytes aligned
  csum_buffer += *(u8 *)buf;
}

u16 csum = (u16)csum_buffer + (u16)(csum_buffer >> 16);
csum = ~csum;

Edit: I just figured out the algorithm wasn’t correct, I was performing a simple sum on the packet instead of the one’s complement sum, fixed it.

@FedeParola Thank you for this! I found this issue earlier today and have been trying to get things to work with the original code you provided. I then came back to this issue again and saw you just updated the code five hours ago. Great timing, haha!

The new code you provided works for me when calculating the UDP checksum plus payload from scratch and I made it into a function for anybody interested. I also believe it should work with the TCP checksums as well if you change struct udphdr *udph to struct tcphdr *tcph for example and modify the rest of the function.

#define MAX_UDP_SIZE 1480

...

/* All credit goes to FedeParola from https://github.com/iovisor/bcc/issues/2463 */
__attribute__((__always_inline__))
static inline __u16 caludpcsum(struct iphdr *iph, struct udphdr *udph, void *data_end)
{
    __u32 csum_buffer = 0;
    __u16 *buf = (void *)udph;

    // Compute pseudo-header checksum
    csum_buffer += (__u16)iph->saddr;
    csum_buffer += (__u16)(iph->saddr >> 16);
    csum_buffer += (__u16)iph->daddr;
    csum_buffer += (__u16)(iph->daddr >> 16);
    csum_buffer += (__u16)iph->protocol << 8;
    csum_buffer += udph->len;

    // Compute checksum on udp header + payload
    for (int i = 0; i < MAX_UDP_SIZE; i += 2) 
    {
        if ((void *)(buf + 1) > data_end) 
        {
            break;
        }

        csum_buffer += *buf;
        buf++;
    }

    if ((void *)buf + 1 <= data_end) 
    {
        // In case payload is not 2 bytes aligned
        csum_buffer += *(__u8 *)buf;
    }

    __u16 csum = (__u16)csum_buffer + (__u16)(csum_buffer >> 16);
    csum = ~csum;

    return csum;
}

...

udph->check = 0;
udph->check = caludpcsum(iph, udph, data_end);

My original thinking is a helper to do a whole checksum calculation for a section of code, e.g., udp payload.

I don’t know if there is pertinent use case where you can not use incremental checksum update. (I guess not so much 🤔)

The problem is maybe more a documentation one, as “noob” like me will maybe take time before to understand they should use incremental checksum update instead of a full checksum calculation.