(Fully rewritten to simplify the answer. I also fixed quite a few bugs in my C header and source files listed below.)
There was a discussion about exactly this on the linux-netdev mailing list in April 2014, subject "802.1AD packets - Kernel changes ether type from 88A8 to 8100 on all packets".
It turns out that the kernel does not change the ether type, it simply consumes it on receiving a packet. I show below that it is correctly used for VLAN routing (including separate rules for 802.1AD and 802.1Q VLANs), given a recent enough kernel. Even if the VLAN tag is not used for routing (say, if there are no VLANs configured, or if the 8021q kernel module is not loaded), the VLAN tag is consumed by the kernel.
Thus, the original question, "When sending Ethernet frames the ethertype is being re-written", is incorrect: the ethertype is not being re-written. It is consumed by the kernel.
Because the VLAN tag is consumed by the kernel, libpcap -- which is the packet capture library used by tcpdump, wireshark et al. -- tries to reintroduce it back into the packet headers. Unfortunately, it always uses the 802.1Q VLAN header (8100).
There is a suggested change to libpcap that fixes exactly this problem in libpcap, but as of this writing, it does not seem to have been included yet; you can still see htons(ETH_P_8021Q)
hardcoded in several places in the libpcap source file for Linux.
I cannot assume you'll take my word for this, so let me show you how you can ascertain this for yourself.
Let's write a simple packet sender and receiver, that uses the kernel interfaces directly, without the assistance of libpcap.
rawpacket.h:
#ifndef RAWPACKET_H
#define RAWPACKET_H
#include <unistd.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <netpacket/packet.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <arpa/inet.h>
#include <linux/if_ether.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
static int rawpacket_socket(const int protocol,
const char *const interface,
void *const hwaddr)
{
struct ifreq iface;
struct sockaddr_ll addr;
int socketfd, result;
int ifindex = 0;
if (!interface || !*interface) {
errno = EINVAL;
return -1;
}
socketfd = socket(AF_PACKET, SOCK_RAW, htons(protocol));
if (socketfd == -1)
return -1;
do {
memset(&iface, 0, sizeof iface);
strncpy((char *)&iface.ifr_name, interface, IFNAMSIZ);
result = ioctl(socketfd, SIOCGIFINDEX, &iface);
if (result == -1)
break;
ifindex = iface.ifr_ifindex;
memset(&iface, 0, sizeof iface);
strncpy((char *)&iface.ifr_name, interface, IFNAMSIZ);
result = ioctl(socketfd, SIOCGIFFLAGS, &iface);
if (result == -1)
break;
iface.ifr_flags |= IFF_PROMISC;
result = ioctl(socketfd, SIOCSIFFLAGS, &iface);
if (result == -1)
break;
memset(&iface, 0, sizeof iface);
strncpy((char *)&iface.ifr_name, interface, IFNAMSIZ);
result = ioctl(socketfd, SIOCGIFHWADDR, &iface);
if (result == -1)
break;
memset(&addr, 0, sizeof addr);
addr.sll_family = AF_PACKET;
addr.sll_protocol = htons(protocol);
addr.sll_ifindex = ifindex;
addr.sll_hatype = 0;
addr.sll_pkttype = 0;
addr.sll_halen = ETH_ALEN; /* Assume ethernet! */
memcpy(&addr.sll_addr, &iface.ifr_hwaddr.sa_data, addr.sll_halen);
if (hwaddr)
memcpy(hwaddr, &iface.ifr_hwaddr.sa_data, ETH_ALEN);
if (bind(socketfd, (struct sockaddr *)&addr, sizeof addr))
break;
errno = 0;
return socketfd;
} while (0);
{
const int saved_errno = errno;
close(socketfd);
errno = saved_errno;
return -1;
}
}
static unsigned int tci(const unsigned int priority,
const unsigned int drop,
const unsigned int vlan)
{
return (vlan & 0xFFFU)
| ((!!drop) << 12U)
| ((priority & 7U) << 13U);
}
static size_t rawpacket_qinq(unsigned char *const buffer, size_t const length,
const unsigned char *const srcaddr,
const unsigned char *const dstaddr,
const unsigned int service_tci,
const unsigned int customer_tci,
const unsigned int ethertype)
{
unsigned char *ptr = buffer;
uint32_t tag;
uint16_t type;
if (length < 2 * ETH_ALEN + 4 + 4 + 2) {
errno = ENOSPC;
return (size_t)0;
}
memcpy(ptr, dstaddr, ETH_ALEN);
ptr += ETH_ALEN;
memcpy(ptr, srcaddr, ETH_ALEN);
ptr += ETH_ALEN;
/* Service 802.1AD tag. */
tag = htonl( ((uint32_t)(ETH_P_8021AD) << 16U)
| ((uint32_t)service_tci & 0xFFFFU) );
memcpy(ptr, &tag, 4);
ptr += 4;
/* Client 802.1Q tag. */
tag = htonl( ((uint32_t)(ETH_P_8021Q) << 16U)
| ((uint32_t)customer_tci & 0xFFFFU) );
memcpy(ptr, &tag, 4);
ptr += 4;
/* Ethertype tag. */
type = htons((uint16_t)ethertype);
memcpy(ptr, &type, 2);
ptr += 2;
return (size_t)(ptr - buffer);
}
#endif /* RAWPACKET_H */
sender.c:
#include <string.h>
#include <errno.h>
#include <stdio.h>
#include "rawpacket.h"
static size_t parse_data(unsigned char *const data, const size_t size,
const char *const string)
{
char *ends = strncpy((char *)data, string, size);
return (size_t)(ends - (char *)data);
}
static int parse_hwaddr(const char *const string,
void *const hwaddr)
{
unsigned int addr[6];
char dummy;
if (sscanf(string, " %02x:%02x:%02x:%02x:%02x:%02x %c",
&addr[0], &addr[1], &addr[2],
&addr[3], &addr[4], &addr[5],
&dummy) == 6 ||
sscanf(string, " %02x%02x%02x%02x%02x%02x %c",
&addr[0], &addr[1], &addr[2],
&addr[3], &addr[4], &addr[5],
&dummy) == 6) {
if (hwaddr) {
((unsigned char *)hwaddr)[0] = addr[0];
((unsigned char *)hwaddr)[1] = addr[1];
((unsigned char *)hwaddr)[2] = addr[2];
((unsigned char *)hwaddr)[3] = addr[3];
((unsigned char *)hwaddr)[4] = addr[4];
((unsigned char *)hwaddr)[5] = addr[5];
}
return 0;
}
errno = EINVAL;
return -1;
}
int main(int argc, char *argv[])
{
unsigned char packet[ETH_FRAME_LEN + ETH_FCS_LEN];
unsigned char srcaddr[6], dstaddr[6];
int socketfd;
size_t size, i;
ssize_t n;
if (argc < 3 || argc > 4 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
fprintf(stderr, " %s interface hwaddr [message]\n", argv[0]);
fprintf(stderr, "\n");
return 1;
}
if (parse_hwaddr(argv[2], &dstaddr)) {
fprintf(stderr, "%s: Invalid destination hardware address.\n", argv[2]);
return 1;
}
socketfd = rawpacket_socket(ETH_P_ALL, argv[1], &srcaddr);
if (socketfd == -1) {
fprintf(stderr, "%s: %s.\n", argv[1], strerror(errno));
return 1;
}
memset(packet, 0, sizeof packet);
/* Construct a QinQ header for a fake Ethernet packet type. */
size = rawpacket_qinq(packet, sizeof packet, srcaddr, dstaddr,
tci(7, 0, 1U), tci(7, 0, 2U),
ETH_P_IP);
if (!size) {
fprintf(stderr, "Failed to construct QinQ headers: %s.\n", strerror(errno));
close(socketfd);
return 1;
}
/* Add packet payload. */
if (argc > 3)
size += parse_data(packet + size, sizeof packet - size, argv[3]);
else
size += parse_data(packet + size, sizeof packet - size, "Hello!");
/* Pad with zeroes to minimum 64 octet length. */
if (size < 64)
size = 64;
/* Send it. */
n = send(socketfd, packet, size, 0);
if (n == -1) {
fprintf(stderr, "Failed to send packet: %s.\n", strerror(errno));
shutdown(socketfd, SHUT_RDWR);
close(socketfd);
return 1;
}
fprintf(stderr, "Sent %ld bytes:", (long)n);
for (i = 0; i < size; i++)
fprintf(stderr, " %02x", packet[i]);
fprintf(stderr, "\n");
fflush(stderr);
shutdown(socketfd, SHUT_RDWR);
if (close(socketfd)) {
fprintf(stderr, "Error closing socket: %s.\n", strerror(errno));
return 1;
}
return 0;
}
receiver.c:
#include <sys/types.h>
#include <sys/socket.h>
#include <string.h>
#include <signal.h>
#include <errno.h>
#include <stdio.h>
#include "rawpacket.h"
static volatile sig_atomic_t done = 0;
static void handle_done(int signum)
{
done = signum;
}
static int install_done(const int signum)
{
struct sigaction act;
sigemptyset(&act.sa_mask);
act.sa_handler = handle_done;
act.sa_flags = 0;
if (sigaction(signum, &act, NULL))
return errno;
return 0;
}
static const char *protocol_name(const unsigned int protocol)
{
static char buffer[16];
switch (protocol & 0xFFFFU) {
case 0x0001: return "ETH_P_802_3";
case 0x0002: return "ETH_P_AX25";
case 0x0003: return "ETH_P_ALL";
case 0x0060: return "ETH_P_LOOP";
case 0x0800: return "ETH_P_IP";
case 0x0806: return "ETH_P_ARP";
case 0x8100: return "ETH_P_8021Q (802.1Q VLAN)";
case 0x88A8: return "ETH_P_8021AD (802.1AD VLAN)";
default:
snprintf(buffer, sizeof buffer, "0x%04x", protocol & 0xFFFFU);
return (const char *)buffer;
}
}
static const char *header_type(const unsigned int hatype)
{
static char buffer[16];
switch (hatype) {
case 1: return "ARPHRD_ETHER: Ethernet 10Mbps";
case 2: return "ARPHRD_EETHER: Experimental Ethernet";
case 768: return "ARPHRD_TUNNEL: IP Tunnel";
case 772: return "ARPHRD_LOOP: Loopback";
default:
snprintf(buffer, sizeof buffer, "0x%04x", hatype);
return buffer;
}
}
static const char *packet_type(const unsigned int pkttype)
{
static char buffer[16];
switch (pkttype) {
case PACKET_HOST: return "PACKET_HOST";
case PACKET_BROADCAST: return "PACKET_BROADCAST";
case PACKET_MULTICAST: return "PACKET_MULTICAST";
case PACKET_OTHERHOST: return "PACKET_OTHERHOST";
case PACKET_OUTGOING: return "PACKET_OUTGOING";
default:
snprintf(buffer, sizeof buffer, "0x%02x", pkttype);
return (const char *)buffer;
}
}
static void fhex(FILE *const out,
const char *const before,
const char *const after,
const void *const src, const size_t len)
{
const unsigned char *const data = src;
size_t i;
if (len < 1)
return;
if (before)
fputs(before, out);
for (i = 0; i < len; i++)
fprintf(out, " %02x", data[i]);
if (after)
fputs(after, out);
}
int main(int argc, char *argv[])
{
struct sockaddr_ll addr;
socklen_t addrlen;
unsigned char data[2048];
ssize_t n;
int socketfd, flag;
if (argc != 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
fprintf(stderr, " %s interface\n", argv[0]);
fprintf(stderr, "\n");
return 1;
}
if (install_done(SIGINT) ||
install_done(SIGHUP) ||
install_done(SIGTERM)) {
fprintf(stderr, "Cannot install signal handlers: %s.\n", strerror(errno));
return 1;
}
socketfd = rawpacket_socket(ETH_P_ALL, argv[1], NULL);
if (socketfd == -1) {
fprintf(stderr, "%s: %s.\n", argv[1], strerror(errno));
return 1;
}
flag = 1;
if (setsockopt(socketfd, SOL_SOCKET, SO_REUSEADDR, &flag, sizeof flag)) {
fprintf(stderr, "Cannot set REUSEADDR socket option: %s.\n", strerror(errno));
close(socketfd);
return 1;
}
if (setsockopt(socketfd, SOL_SOCKET, SO_BINDTODEVICE, argv[1], strlen(argv[1]) + 1)) {
fprintf(stderr, "Cannot bind to device %s: %s.\n", argv[1], strerror(errno));
close(socketfd);
return 1;
}
while (!done) {
memset(data, 0, sizeof data);
memset(&addr, 0, sizeof addr);
addrlen = sizeof addr;
n = recvfrom(socketfd, &data, sizeof data, 0,
(struct sockaddr *)&addr, &addrlen);
if (n == -1) {
if (errno == EINTR)
continue;
fprintf(stderr, "Receive error: %s.\n", strerror(errno));
break;
}
printf("Received %d bytes:\n", (int)n);
printf("\t Protocol: %s\n", protocol_name(htons(addr.sll_protocol)));
printf("\t Interface: %d\n", (int)addr.sll_ifindex);
printf("\t Header type: %s\n", header_type(addr.sll_hatype));
printf("\t Packet type: %s\n", packet_type(addr.sll_pkttype));
fhex(stdout, "\t Address:", "\n", addr.sll_addr, addr.sll_halen);
fhex(stdout, "\t Data:", "\n", data, n);
printf("\n");
fflush(stdout);
}
shutdown(socketfd, SHUT_RDWR);
close(socketfd);
return 0;
}
To compile, you can use
gcc -O2 receiver.c -o receiver
gcc -O2 sender.c -o sender
Run without parameters, or with -h
, to see the usage for either one. sender
sends only one packet. receiver
listens on the specified interface (in promiscuous mode), until you interrupt it (Ctrl+C) or send it a TERM
signal.
Start receiver in one virtual terminal on the loopback interface:
sudo ./receiver lo
In another virtual terminal on the same machine, running
sudo ./sender lo FF:FF:FF:FF:FF:FF '_The contents of a 64-byte Ethernet frame_'
will output (newlines and indentation added for ease of understanding)
Sent 64 bytes: ff ff ff ff ff ff
00 00 00 00 00 00
88 a8 e0 01
81 00 e0 02
08 00
5f 54 68 65 20 63 6f 6e
74 65 6e 74 73 20 6f 66
20 61 20 36 34 2d 62 79
74 65 20 45 74 68 65 72
6e 65 74 20 66 72 61 6d
65 5f
In the receiver terminal, however, we see (newlines and indentation added):
Received 64 bytes:
Protocol: ETH_P_ALL
Interface: 1
Header type: ATPHRD_LOOP: Loopback
Packet type: PACKET_OUTGOING
Address: 00 00 00 00 00 00
Data: ff ff ff ff ff ff
00 00 00 00 00 00
88 a8 e0 01
81 00 e0 02
08 00
5f 54 68 65 20 63 6f 6e
74 65 6e 74 73 20 6f 66
20 61 20 36 34 2d 62 79
74 65 20 45 74 68 65 72
6e 65 74 20 66 72 61 6d
65 5f
Received 60 bytes:
Protocol: ETH_P_8021Q (802.1Q VLAN)
Interface: 1
Header type: ATPHRD_LOOP: Loopback
Packet type: PACKET_MULTICAST
Address: 00 00 00 00 00 00
Data: ff ff ff ff ff ff
00 00 00 00 00 00
81 00 e0 02
08 00
5f 54 68 65 20 63 6f 6e
74 65 6e 74 73 20 6f 66
20 61 20 36 34 2d 62 79
74 65 20 45 74 68 65 72
6e 65 74 20 66 72 61 6d
65 5f
The first one, PACKET_OUTGOING, was captured as outgoing; it shows that the kernel did not consume any headers when the packet was sent.
The second one, PACKET_MULTICAST, was captured as it arrived. (Since the Ethernet address was FF:FF:FF:FF:FF:FF, it is a multicast packet.)
As you can see, the latter packet has only the 802.1Q VLAN header -- the client VLAN --, the kernel having consumed the 802.1AD service VLAN tag.
The above confirms the scenario for the loopback interface, at least. Using the raw packet interface, the kernel consumes the 802.1AD VLAN header (the one immediately following the recipient address). If you use tcpdump -i eth0
alongside the receiver, you can see that libpcap is reinserting the consumed header back to the packet!
Loopback interface is a bit special, so let's redo the test using virtual machines. I happen to be running up-to-date Xubuntu 14.04 (all updates installed as of 2014-06-28, Ubuntu 3.13.0-29-generic #53 x86_64 kernel). Sender HW address is 08 00 00 00 00 02, receivers is 08 00 00 00 00 01, and the two are connected to an internal network without anybody else present.
(Again, I add newlines and indentation to the output to make it easier to read.)
Sender, on virtual machine 2:
sudo ./sender eth0 08:00:00:00:00:01 '_The contents of a 64-byte Ethernet frame_'
Sent 64 bytes: 08 00 00 00 00 01
08 00 00 00 00 02
88 a8 e0 01
81 00 e0 02
08 00
5f 54 68 65 20 63 6f 6e
74 65 6e 74 73 20 6f 66
20 61 20 36 34 2d 62 79
74 65 20 45 74 68 65 72
6e 65 74 20 66 72 61 6d
65 5f
Receiver, on virtual machine 1:
sudo ./receiver eth0
Received 60 bytes:
Protocol: ETH_P_8021Q (802.1Q VLAN)
Interface: 2
Header type: ARPHRD_ETHER: Ethernet 10Mbps
Packet type: PACKET_HOST
Address: 08 00 00 00 00 02
Data: 08 00 00 00 00 01
08 00 00 00 00 02
81 00 e0 02
08 00
5f 54 68 65 20 63 6f 6e
74 65 6e 74 73 20 6f 66
20 61 20 36 34 2d 62 79
74 65 20 45 74 68 65 72
6e 65 74 20 66 72 61 6d
65 5f
As you can see, the results are basically the same as for the loopback case. In particular, the 802.1AD service VLAN tag was consumed on receive. (You can use tcpdump or wireshark, to compare the received packets: libpcap is obviously reinserting the consumed VLAN tag pack into the packet.)
If you have recent enough kernel (support was added in April 2013), then you can configure a 802.1AD VLAN(s) on the recipient using:
sudo modprobe 8021q
sudo ip link add link eth0 eth0.service1 type vlan proto 802.1ad id 1
Receiving on eth0
will receive all packets, but on eth0.service1
only those packets with an 802.1AD VLAN tag, with VLAN ID 1. It does not capture frames with 802.1Q VLAN tags with the same VLAN ID, meaning that you can do full routing on receive for both 802.1AD and 802.1Q VLANs.
I didn't trust just the above test, myself; I created a number of 802.1AD and 802.1Q VLANs, with separate receive
instances on each one, and changed the packet headers (not only service (first) tci()
and client (second) tci()
in the rawpacket_qinq()
call in sender.c to change the service and client VLAN IDs, but also changing rawpacket.h, to verify that 802.1AD (88a8) and 802.1Q (8100) VLAN headers are routed correctly on receive). It all worked beautifully, without any hiccups.
In summary:
Given a recent enough Linux kernel version, Ethernet frames are correctly routed by the Linux kernel (by the 8021q module) on receive, including separate VLAN interfaces for 802.1AD and 802.1Q with the same VLAN IDs. The kernel does consume the VLAN header used for routing, even if no VLANs are configured.
Questions?
ethtool
to make sure any VLAN offloading is disabled on your NIC(s). Also, how are you openingsockFD
? – ObeliasockFD = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
– Dysarttx-vlan-offloading
andrx-vlan-offloading
usingethtool
on both machines hasn't change the behaviour but that has given my some food for thought. I usedsudo ethtool -K eth0 rxvlan off && sudo ethtool -K eth0 txvlan off
- I will have a further poke around ethtool though and see what I can find. – Dysart