高性能の Ethernet スイッチが 1000 円くらいで買える時代になっているが,学習のようすを観察するなどの実験用としてはプログラマブルなスイッチがほしいこともある. Ethernet の標準にきっちり対応しているわけではないが,学習機能をもち,かんたんなスイッチングができるプログラムをつくったので,ここに掲載する.
つかいかたはつぎのとおり.
./ethernode <FirstInterface> <LastInterface>
または
./ethernode <FirstInterface> <LastInterface> <PrintLevel>
スイッチとしてつかうので,すくなくとも 3 個のネットワーク・インタフェースを搭載している必要がある. 指定した範囲が 1..3 なら,それらのインタフェースは eth1, eth2, eth3 である. eth0 は制御用に確保しておくのがよい. PrintLevel はパケット転送,学習,パケット内容などに関するメッセージを制御する. 0 ならなにも出力しないが,1 から 4 のときにメッセージを出力する. (詳細はプログラムを参照.)
/***
*
* Software-based Ethernet Switch
*
* Coded by Yasusi Kanada
* Ver 0.1 2011-1-11 Initial version
* Ver 1.0 2011-1-16 48-bit ID splitted
* Ver 1.1 2011-2-23 Learning/traffic statistics function introduced
* Ver 1.11 2011-2-24 Debugged and main-loop optimized
* Ver 1.12 2012-5-20 Cleaned
*
***/
#include "Ether.h"
#include <fcntl.h>
#include <sys/time.h>
#define bool int32_t
#define true 1
#define false 0
// #define DEBUG 0
#define MAX_PACKET_SIZE 2048
// Sufficiently larger than the MTU
// Timeout time in usec
#define LearningTimeout 15000000
#define ReferenceTimeout 60000000
#define MaxInterfaces 5
int32_t FirstInterface = 1;
int32_t LastInterface = 3;
int32_t fd[MaxInterfaces];
int32_t ifindex[MaxInterfaces];
int32_t displayLevel = 0;
/* displayLevel = 0: no display mode
displayLevel = 1: statistics-only mode
displayLevel = 2: learning-table dump mode
displayLevel = 3: debug mode
displayLevel = 4: louder debug mode
*/
int32_t numLearned = 0;
int32_t numInput = 0;
int32_t numOutput = 0;
extern void _exit(int32_t);
time_t time(time_t *timer);
/**
* Open a socket for the network interface
*/
int32_t open_socket(int32_t index, int32_t *rifindex) {
unsigned char buf[MAX_PACKET_SIZE];
int32_t i;
int32_t ifindex;
struct ifreq ifr;
struct sockaddr_ll sll;
unsigned char interface[IFNAMSIZ];
strncpy(interface, "ethX", IFNAMSIZ);
interface[3] = '0' + index;
int32_t fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
if (fd == -1) {
printf("%s - ", interface);
perror("socket");
_exit(1);
};
// get interface index
memset(&ifr, 0, sizeof(ifr));
strncpy(ifr.ifr_name, interface, IFNAMSIZ);
if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
printf("%s - ", interface);
perror("SIOCGIFINDEX");
_exit(1);
};
ifindex = ifr.ifr_ifindex;
*rifindex = ifindex;
// set promiscuous mode
memset(&ifr, 0, sizeof(ifr));
strncpy(ifr.ifr_name, interface, IFNAMSIZ);
ioctl(fd, SIOCGIFFLAGS, &ifr);
ifr.ifr_flags |= IFF_PROMISC;
ioctl(fd, SIOCSIFFLAGS, &ifr);
memset(&sll, 0xff, sizeof(sll));
sll.sll_family = AF_PACKET;
sll.sll_protocol = htons(ETH_P_ALL);
sll.sll_ifindex = ifindex;
if (bind(fd, (struct sockaddr *)&sll, sizeof(sll)) == -1) {
printf("%s - ", interface);
perror("bind");
_exit(1);
};
/* flush all received packets.
*
* raw-socket receives packets from all interfaces
* when the socket is not bound to an interface
*/
do {
fd_set fds;
struct timeval t;
FD_ZERO(&fds);
FD_SET(fd, &fds);
memset(&t, 0, sizeof(t));
i = select(FD_SETSIZE, &fds, NULL, NULL, &t);
if (i > 0) {
ssize_t size = recv(fd, buf, MAX_PACKET_SIZE, 0);
if (displayLevel == 4 && size > 0) printf("interface %d flushed\n", ifindex);
};
} while (i);
if (displayLevel >= 4) {
printf("%s opened (fd=%d interface=%d)\n", interface, fd, ifindex);
}
return fd;
}
/**
* Print IPEC packet content
*/
void printPacket(EtherPacket *packet, ssize_t packetSize, char *message) {
printf("%s from x%1x:x%1x to x%1x:x%1x\n",
message, ntohs(packet->srcMAC1), ntohl(packet->srcMAC2),
ntohs(packet->destMAC1), ntohl(packet->destMAC2));
}
/**
* Forwarding-table Handler
*/
/* Forwarding table entry data structure */
struct ForwardingTable {
time_t tv_sec; // timestamp 'sec'-part
suseconds_t tv_usec; // timestamp 'usec'-part
uint32_t idh;
uint32_t idl;
int32_t ifnum;
unsigned short age;
} __attribute__((packed));
#define FWDTBLSIZE 1000
struct ForwardingTable fwdtbl[FWDTBLSIZE];
/* Initialize forwarding table */
void initfwdtbl() {
int32_t i;
for (i = 0; i < FWDTBLSIZE; i++) {
fwdtbl[i].idh = 0;
fwdtbl[i].idl = 0;
}
}
/* Dump a forwarding table entry */
void dumpfwdtbl(int32_t index, char *message) {
printf("%s index=%d id=x%04x%08x ifnum=eth%d age=%d timestamp=%d.%d\n",
message, index, (uint16_t)fwdtbl[index].idh,
(uint32_t)fwdtbl[index].idl, fwdtbl[index].ifnum,
fwdtbl[index].age, (int32_t)fwdtbl[index].tv_sec, (int32_t)fwdtbl[index].tv_usec);
}
/* Snapshot the forwarding table */
void snapfwdtbl() {
int32_t i;
printf(" Learned addresses:\n");
for (i = 1; i < FWDTBLSIZE; i++) {
if (fwdtbl[i].idh != 0 || fwdtbl[i].idl != 0) {
printf(" %2d %04x%08x eth%d\n",
i, fwdtbl[i].idh, fwdtbl[i].idl, fwdtbl[i].ifnum);
} else {
return;
}
}
}
/* Search the forwarding table (linear search used currently) */
int32_t getfwdtbl(uint32_t idh, uint32_t idl) {
// int32_t getfwdtbl(uint64_t id) {
int32_t i;
for (i = 1; i < FWDTBLSIZE; i++) { // (fwdtbl[0] not used)
if (fwdtbl[i].idh == idh && fwdtbl[i].idl == idl) { // already registered
return i;
} else if (fwdtbl[i].idh == 0 && fwdtbl[i].idl == 0) { // not yet registered
return -i;
}
}
return 0; // Table full!
}
/* Fill a forwarding table entry */
int32_t setfwdtbl(EtherPacket *packet, int32_t index, int32_t srcIfnum) {
struct timeval tv;
struct timezone tz;
fwdtbl[index].idh = ntohs(packet->srcMAC1);
fwdtbl[index].idl = ntohl(packet->srcMAC2);
fwdtbl[index].ifnum = srcIfnum;
gettimeofday(&tv, &tz);
fwdtbl[index].tv_sec = tv.tv_sec;
fwdtbl[index].tv_usec = tv.tv_usec;
if (displayLevel >= 3) dumpfwdtbl(index, "\nNewly learned:");
return index;
}
/* Return true iff the forwarding table entry is timed-out. */
int32_t timedOut(struct timeval now, struct ForwardingTable fwdentry,
suseconds_t timeoutTime) {
double sec = (now.tv_sec - fwdentry.tv_sec) +
((double)(now.tv_usec - fwdentry.tv_usec - timeoutTime)) / 1000000;
if (displayLevel >= 4) {
printf("%s: now %d.%d fwdtbl %d.%d timeout %d msec %d\n",
sec > 0 ? "Timed out" : "Not timed out",
(int32_t)now.tv_sec, (int32_t)now.tv_usec,
(int32_t)fwdentry.tv_sec, (int32_t)fwdentry.tv_usec,
(int32_t)timeoutTime, (int32_t)(1000 * sec));
}
return sec > 0;
}
/**
* Forward a packet
*/
int32_t forward(EtherPacket *packet, ssize_t sizein, int32_t srcIfnum) {
struct timeval now;
struct timezone tz;
gettimeofday(&now, &tz); // get current time
int32_t ifnum;
if (displayLevel >= 3) {
printPacket(packet, sizein, " ");
}
int32_t index = getfwdtbl(ntohs(packet->srcMAC1), ntohl(packet->srcMAC2));
if (index == 0) { // Table full!
_exit(1);
} else if (index < 0) { // source not yet learned - learn
setfwdtbl(packet, -index, srcIfnum);
numLearned++; // 2011-2-23
} else { // already learned
if (displayLevel >= 4) dumpfwdtbl(index, "Before:");
if (timedOut(now, fwdtbl[index], LearningTimeout)) { // timeout (_sec?!)
if (displayLevel >= 3) {
printf("Updated! if: %d -> %d, timestamp: %d.%d -> %d.%d\n",
fwdtbl[index].ifnum, srcIfnum,
(int32_t)fwdtbl[index].tv_sec, (int32_t)fwdtbl[index].tv_usec,
(int32_t)now.tv_sec, (int32_t)now.tv_usec);
}
fwdtbl[index].ifnum = srcIfnum;
fwdtbl[index].tv_sec = now.tv_sec;
fwdtbl[index].tv_usec = now.tv_usec;
}
}
struct sockaddr_ll sll;
memset(&sll, 0, sizeof(sll));
sll.sll_family = AF_PACKET;
sll.sll_protocol = htons(ETH_P_ALL);
uint32_t destMAC1 = ntohs(packet->destMAC1);
uint32_t destMAC2 = ntohl(packet->destMAC2);
int32_t destIndex = getfwdtbl(destMAC1, destMAC2);
if (destIndex < 0 || // next-hop interface not known
timedOut(now, fwdtbl[destIndex], ReferenceTimeout)) { // timeout (_sec?!)
// flood packet
if (displayLevel >= 3) {
printf("@@@@@@@@\n@ FLOOD packet @ (dest=%x:%x)\n@@@@@@@@\n",
destMAC1, destMAC2);
}
for (ifnum = FirstInterface; ifnum <= LastInterface; ifnum++) {
if (srcIfnum != ifnum) {
sll.sll_ifindex = ifindex[ifnum];
ssize_t sizeout = sendto(fd[ifnum], packet, sizein, 0,
(struct sockaddr *)&sll, sizeof(sll));
if (sizeout < 0) {
perror("sendto");
} else {
if (displayLevel >= 4) printf("%d bytes sent through eth%d\n", sizeout, ifnum);
}
}
}
} else {
int32_t destIfnum = fwdtbl[destIndex].ifnum;
if (displayLevel >= 3) {
printf("@@@@@@@@@\n@ SWITCH packet @ (dest=%x:%x) to eth%d\n@@@@@@@@@\n",
destMAC1, destMAC2, destIfnum);
}
sll.sll_ifindex = ifindex[destIfnum];
ssize_t sizeout = sendto(fd[destIfnum], packet, sizein, 0,
(struct sockaddr *)&sll, sizeof(sll));
if (sizeout < 0) {
perror("sendto");
} else {
if (displayLevel >= 4) printf("%d bytes sent through eth%d\n", sizeout, destIfnum);
}
}
}
/**
* Main program
*/
int32_t main(int32_t argc, char **argv) {
unsigned char buf[MAX_PACKET_SIZE];
int32_t ifnum; // interface number (different from ifindex)
int32_t count = 0;
time_t lastTime, timer;
if (++count < argc) {
FirstInterface = atoi(argv[count]); // Min interface #
}
if (++count < argc) {
LastInterface = atoi(argv[count]); // Max interface #
}
if (FirstInterface < 0 || LastInterface < FirstInterface ||
MaxInterfaces < LastInterface) {
printf("Interface range (%d..%d) must be between 0..%d\n",
FirstInterface, LastInterface, MaxInterfaces);
}
if (LastInterface < FirstInterface + 2) {
printf("Three or more interfaces are required for switching\n");
}
if (++count < argc) { // Debug print
displayLevel = atoi(argv[count]);
}
// Open raw sockets and initialize for sending packets
for (ifnum = FirstInterface; ifnum <= LastInterface; ifnum++) {
fd[ifnum] = open_socket(ifnum, &ifindex[ifnum]);
// Set non-blocking mode:
int32_t flags = fcntl(fd[ifnum], F_GETFL, 0);
fcntl(fd[ifnum], F_SETFL, O_NONBLOCK | flags);
}
initfwdtbl();
lastTime = time(&timer);
// Forwarding operation
for (;;) {
for (ifnum = FirstInterface; ifnum <= LastInterface; ifnum++) {
ssize_t sizein = recv(fd[ifnum], buf, MAX_PACKET_SIZE, 0);
if (sizein >= 0) {
if (displayLevel >= 3) {
printf("\nReceived through eth%d (%d bytes)\n", ifnum, sizein);
}
forward((EtherPacket*)buf, sizein, ifnum);
count++;
if (displayLevel >= 1) {
time_t now = time(&timer);
if (now >= lastTime + 1) {
printf("\n\n");
if (numLearned > 2) {
printf("Learning: %3d times\n", numLearned);
} else if (numLearned == 2) {
printf("Learning: Twice\n");
} else {
printf("Learning: Once\n");
}
if (displayLevel == 2) {
snapfwdtbl();
}
printf("External packets: RX %4d TX %4d\n", numInput, numOutput);
lastTime = now;
}
}
}
}
}
}
ここで使用している Ether.h の内容はつぎのとおりである.
/***
*
* Software-based Ethernet Common Header
*
* Coded by Yasusi Kanada
* Ver 0.2 2011-1-11 Initial version
*
***/
#include <linux/if_packet.h>
#include <linux/if_ether.h>
#include <net/if.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <stdio.h>
#include <elf.h>
#include <string.h>
// 0 6 12 14
// +-------+-------+----+---------------+
// | DMAC | SMAC |Type| Payload |
// +-------+-------+----+---------------+
struct _EtherHeader {
uint16_t destMAC1;
uint32_t destMAC2;
uint16_t srcMAC1;
uint32_t srcMAC2;
#ifdef VLAN
uint32_t VLANTag;
#endif
uint16_t type;
int32_t payload;
} __attribute__((packed));
typedef struct _EtherHeader EtherPacket;
