* interface as the means of communication with the user level.
*
* The IP fragmentation functionality.
- *
+ *
* Version: $Id: ip_fragment.c,v 1.59 2002/01/12 07:54:56 davem Exp $
*
* Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
spinlock_t lock;
atomic_t refcnt;
struct timer_list timer; /* when will this queue expire? */
- struct timeval stamp;
+ ktime_t stamp;
int iif;
unsigned int rid;
struct inet_peer *peer;
static LIST_HEAD(ipq_lru_list);
int ip_frag_nqueues = 0;
+static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+ struct net_device *dev);
+
static __inline__ void __ipq_unlink(struct ipq *qp)
{
hlist_del(&qp->list);
{
struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
- if(!qp)
+ if (!qp)
return NULL;
atomic_add(sizeof(struct ipq), &ip_frag_mem);
return qp;
}
}
-/* Memory limiting on fragments. Evictor trashes the oldest
+/* Memory limiting on fragments. Evictor trashes the oldest
* fragment queue until we are back under the threshold.
*/
static void ip_evictor(void)
if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) {
struct sk_buff *head = qp->fragments;
/* Send an ICMP "Fragment Reassembly Timeout" message. */
- if ((head->dev = dev_get_by_index(qp->iif)) != NULL) {
+ if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) {
icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
dev_put(head->dev);
}
* promoted read lock to write lock.
*/
hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
- if(qp->id == qp_in->id &&
- qp->saddr == qp_in->saddr &&
- qp->daddr == qp_in->daddr &&
- qp->protocol == qp_in->protocol &&
- qp->user == qp_in->user) {
+ if (qp->id == qp_in->id &&
+ qp->saddr == qp_in->saddr &&
+ qp->daddr == qp_in->daddr &&
+ qp->protocol == qp_in->protocol &&
+ qp->user == qp_in->user) {
atomic_inc(&qp->refcnt);
write_unlock(&ipfrag_lock);
qp_in->last_in |= COMPLETE;
read_lock(&ipfrag_lock);
hash = ipqhashfn(id, saddr, daddr, protocol);
hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
- if(qp->id == id &&
- qp->saddr == saddr &&
- qp->daddr == daddr &&
- qp->protocol == protocol &&
- qp->user == user) {
+ if (qp->id == id &&
+ qp->saddr == saddr &&
+ qp->daddr == daddr &&
+ qp->protocol == protocol &&
+ qp->user == user) {
atomic_inc(&qp->refcnt);
read_unlock(&ipfrag_lock);
return qp;
}
/* Add new segment to existing queue. */
-static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
{
struct sk_buff *prev, *next;
+ struct net_device *dev;
int flags, offset;
int ihl, end;
+ int err = -ENOENT;
if (qp->last_in & COMPLETE)
goto err;
if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
- unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) {
+ unlikely(ip_frag_too_far(qp)) &&
+ unlikely(err = ip_frag_reinit(qp))) {
ipq_kill(qp);
goto err;
}
- offset = ntohs(skb->nh.iph->frag_off);
+ offset = ntohs(ip_hdr(skb)->frag_off);
flags = offset & ~IP_OFFSET;
offset &= IP_OFFSET;
offset <<= 3; /* offset is in 8-byte chunks */
- ihl = skb->nh.iph->ihl * 4;
+ ihl = ip_hdrlen(skb);
/* Determine the position of this fragment. */
- end = offset + skb->len - ihl;
+ end = offset + skb->len - ihl;
+ err = -EINVAL;
/* Is this the final fragment? */
if ((flags & IP_MF) == 0) {
if (end == offset)
goto err;
+ err = -ENOMEM;
if (pskb_pull(skb, ihl) == NULL)
goto err;
- if (pskb_trim_rcsum(skb, end-offset))
+
+ err = pskb_trim_rcsum(skb, end - offset);
+ if (err)
goto err;
/* Find out which fragments are in front and at the back of us
* this fragment, right?
*/
prev = NULL;
- for(next = qp->fragments; next != NULL; next = next->next) {
+ for (next = qp->fragments; next != NULL; next = next->next) {
if (FRAG_CB(next)->offset >= offset)
break; /* bingo! */
prev = next;
if (i > 0) {
offset += i;
+ err = -EINVAL;
if (end <= offset)
goto err;
+ err = -ENOMEM;
if (!pskb_pull(skb, i))
goto err;
if (skb->ip_summed != CHECKSUM_UNNECESSARY)
}
}
+ err = -ENOMEM;
+
while (next && FRAG_CB(next)->offset < end) {
int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
else
qp->fragments = skb;
- if (skb->dev)
- qp->iif = skb->dev->ifindex;
- skb->dev = NULL;
- skb_get_timestamp(skb, &qp->stamp);
+ dev = skb->dev;
+ if (dev) {
+ qp->iif = dev->ifindex;
+ skb->dev = NULL;
+ }
+ qp->stamp = skb->tstamp;
qp->meat += skb->len;
atomic_add(skb->truesize, &ip_frag_mem);
if (offset == 0)
qp->last_in |= FIRST_IN;
+ if (qp->last_in == (FIRST_IN | LAST_IN) && qp->meat == qp->len)
+ return ip_frag_reasm(qp, prev, dev);
+
write_lock(&ipfrag_lock);
list_move_tail(&qp->lru_list, &ipq_lru_list);
write_unlock(&ipfrag_lock);
-
- return;
+ return -EINPROGRESS;
err:
kfree_skb(skb);
+ return err;
}
/* Build a new IP datagram from all its fragments. */
-static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
+static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+ struct net_device *dev)
{
struct iphdr *iph;
struct sk_buff *fp, *head = qp->fragments;
int len;
int ihlen;
+ int err;
ipq_kill(qp);
+ /* Make the one we just received the head. */
+ if (prev) {
+ head = prev->next;
+ fp = skb_clone(head, GFP_ATOMIC);
+
+ if (!fp)
+ goto out_nomem;
+
+ fp->next = head->next;
+ prev->next = fp;
+
+ skb_morph(head, qp->fragments);
+ head->next = qp->fragments->next;
+
+ kfree_skb(qp->fragments);
+ qp->fragments = head;
+ }
+
BUG_TRAP(head != NULL);
BUG_TRAP(FRAG_CB(head)->offset == 0);
/* Allocate a new buffer for the datagram. */
- ihlen = head->nh.iph->ihl*4;
+ ihlen = ip_hdrlen(head);
len = ihlen + qp->len;
- if(len > 65535)
+ err = -E2BIG;
+ if (len > 65535)
goto out_oversize;
/* Head of list must not be cloned. */
+ err = -ENOMEM;
if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
goto out_nomem;
}
skb_shinfo(head)->frag_list = head->next;
- skb_push(head, head->data - head->nh.raw);
+ skb_push(head, head->data - skb_network_header(head));
atomic_sub(head->truesize, &ip_frag_mem);
for (fp=head->next; fp; fp = fp->next) {
head->next = NULL;
head->dev = dev;
- skb_set_timestamp(head, &qp->stamp);
+ head->tstamp = qp->stamp;
- iph = head->nh.iph;
+ iph = ip_hdr(head);
iph->frag_off = 0;
iph->tot_len = htons(len);
IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
qp->fragments = NULL;
- return head;
+ return 0;
out_nomem:
- LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing "
+ LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing "
"queue %p\n", qp);
goto out_fail;
out_oversize:
NIPQUAD(qp->saddr));
out_fail:
IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
- return NULL;
+ return err;
}
/* Process an incoming IP datagram fragment. */
struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
{
- struct iphdr *iph = skb->nh.iph;
struct ipq *qp;
- struct net_device *dev;
-
+
IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
/* Start by cleaning up the memory. */
if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh)
ip_evictor();
- dev = skb->dev;
-
/* Lookup (or create) queue header */
- if ((qp = ip_find(iph, user)) != NULL) {
- struct sk_buff *ret = NULL;
+ if ((qp = ip_find(ip_hdr(skb), user)) != NULL) {
+ int ret;
spin_lock(&qp->lock);
- ip_frag_queue(qp, skb);
-
- if (qp->last_in == (FIRST_IN|LAST_IN) &&
- qp->meat == qp->len)
- ret = ip_frag_reasm(qp, dev);
+ ret = ip_frag_queue(qp, skb);
spin_unlock(&qp->lock);
ipq_put(qp, NULL);
- return ret;
+ return ret ? NULL : skb;
}
IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
return NULL;
}
-void ipfrag_init(void)
+void __init ipfrag_init(void)
{
ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
(jiffies ^ (jiffies >> 6)));