~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux-2.6.16/net/packet/af_packet.c

Version: ~ [ 2.6.16 ] ~ [ 2.6.17 ] ~
Architecture: ~ [ ia64 ] ~ [ i386 ] ~ [ arm ] ~ [ ppc ] ~ [ sparc64 ] ~

  1 /*
  2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
  3  *              operating system.  INET is implemented using the  BSD Socket
  4  *              interface as the means of communication with the user level.
  5  *
  6  *              PACKET - implements raw packet sockets.
  7  *
  8  * Version:     $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
  9  *
 10  * Authors:     Ross Biro
 11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
 13  *
 14  * Fixes:       
 15  *              Alan Cox        :       verify_area() now used correctly
 16  *              Alan Cox        :       new skbuff lists, look ma no backlogs!
 17  *              Alan Cox        :       tidied skbuff lists.
 18  *              Alan Cox        :       Now uses generic datagram routines I
 19  *                                      added. Also fixed the peek/read crash
 20  *                                      from all old Linux datagram code.
 21  *              Alan Cox        :       Uses the improved datagram code.
 22  *              Alan Cox        :       Added NULL's for socket options.
 23  *              Alan Cox        :       Re-commented the code.
 24  *              Alan Cox        :       Use new kernel side addressing
 25  *              Rob Janssen     :       Correct MTU usage.
 26  *              Dave Platt      :       Counter leaks caused by incorrect
 27  *                                      interrupt locking and some slightly
 28  *                                      dubious gcc output. Can you read
 29  *                                      compiler: it said _VOLATILE_
 30  *      Richard Kooijman        :       Timestamp fixes.
 31  *              Alan Cox        :       New buffers. Use sk->mac.raw.
 32  *              Alan Cox        :       sendmsg/recvmsg support.
 33  *              Alan Cox        :       Protocol setting support
 34  *      Alexey Kuznetsov        :       Untied from IPv4 stack.
 35  *      Cyrus Durgin            :       Fixed kerneld for kmod.
 36  *      Michal Ostrowski        :       Module initialization cleanup.
 37  *         Ulises Alonso        :       Frame number limit removal and 
 38  *                                      packet_set_ring memory leak.
 39  *              Eric Biederman  :       Allow for > 8 byte hardware addresses.
 40  *                                      The convention is that longer addresses
 41  *                                      will simply extend the hardware address
 42  *                                      byte arrays at the end of sockaddr_ll 
 43  *                                      and packet_mreq.
 44  *
 45  *              This program is free software; you can redistribute it and/or
 46  *              modify it under the terms of the GNU General Public License
 47  *              as published by the Free Software Foundation; either version
 48  *              2 of the License, or (at your option) any later version.
 49  *
 50  */
 51  
 52 #include <linux/config.h>
 53 #include <linux/types.h>
 54 #include <linux/sched.h>
 55 #include <linux/mm.h>
 56 #include <linux/capability.h>
 57 #include <linux/fcntl.h>
 58 #include <linux/socket.h>
 59 #include <linux/in.h>
 60 #include <linux/inet.h>
 61 #include <linux/netdevice.h>
 62 #include <linux/if_packet.h>
 63 #include <linux/wireless.h>
 64 #include <linux/kmod.h>
 65 #include <net/ip.h>
 66 #include <net/protocol.h>
 67 #include <linux/skbuff.h>
 68 #include <net/sock.h>
 69 #include <linux/errno.h>
 70 #include <linux/timer.h>
 71 #include <asm/system.h>
 72 #include <asm/uaccess.h>
 73 #include <asm/ioctls.h>
 74 #include <asm/page.h>
 75 #include <asm/io.h>
 76 #include <linux/proc_fs.h>
 77 #include <linux/seq_file.h>
 78 #include <linux/poll.h>
 79 #include <linux/module.h>
 80 #include <linux/init.h>
 81 
 82 #ifdef CONFIG_INET
 83 #include <net/inet_common.h>
 84 #endif
 85 
 86 #define CONFIG_SOCK_PACKET      1
 87 
 88 /*
 89    Proposed replacement for SIOC{ADD,DEL}MULTI and
 90    IFF_PROMISC, IFF_ALLMULTI flags.
 91 
 92    It is more expensive, but I believe,
 93    it is really correct solution: reentereble, safe and fault tolerant.
 94 
 95    IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
 96    reference count and global flag, so that real status is
 97    (gflag|(count != 0)), so that we can use obsolete faulty interface
 98    not harming clever users.
 99  */
100 #define CONFIG_PACKET_MULTICAST 1
101 
102 /*
103    Assumptions:
104    - if device has no dev->hard_header routine, it adds and removes ll header
105      inside itself. In this case ll header is invisible outside of device,
106      but higher levels still should reserve dev->hard_header_len.
107      Some devices are enough clever to reallocate skb, when header
108      will not fit to reserved space (tunnel), another ones are silly
109      (PPP).
110    - packet socket receives packets with pulled ll header,
111      so that SOCK_RAW should push it back.
112 
113 On receive:
114 -----------
115 
116 Incoming, dev->hard_header!=NULL
117    mac.raw -> ll header
118    data    -> data
119 
120 Outgoing, dev->hard_header!=NULL
121    mac.raw -> ll header
122    data    -> ll header
123 
124 Incoming, dev->hard_header==NULL
125    mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
126               PPP makes it, that is wrong, because introduce assymetry
127               between rx and tx paths.
128    data    -> data
129 
130 Outgoing, dev->hard_header==NULL
131    mac.raw -> data. ll header is still not built!
132    data    -> data
133 
134 Resume
135   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
136 
137 
138 On transmit:
139 ------------
140 
141 dev->hard_header != NULL
142    mac.raw -> ll header
143    data    -> ll header
144 
145 dev->hard_header == NULL (ll header is added by device, we cannot control it)
146    mac.raw -> data
147    data -> data
148 
149    We should set nh.raw on output to correct posistion,
150    packet classifier depends on it.
151  */
152 
153 /* List of all packet sockets. */
154 static HLIST_HEAD(packet_sklist);
155 static DEFINE_RWLOCK(packet_sklist_lock);
156 
157 static atomic_t packet_socks_nr;
158 
159 
160 /* Private packet socket structures. */
161 
162 #ifdef CONFIG_PACKET_MULTICAST
163 struct packet_mclist
164 {
165         struct packet_mclist    *next;
166         int                     ifindex;
167         int                     count;
168         unsigned short          type;
169         unsigned short          alen;
170         unsigned char           addr[MAX_ADDR_LEN];
171 };
172 /* identical to struct packet_mreq except it has
173  * a longer address field.
174  */
175 struct packet_mreq_max
176 {
177         int             mr_ifindex;
178         unsigned short  mr_type;
179         unsigned short  mr_alen;
180         unsigned char   mr_address[MAX_ADDR_LEN];
181 };
182 #endif
183 #ifdef CONFIG_PACKET_MMAP
184 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
185 #endif
186 
187 static void packet_flush_mclist(struct sock *sk);
188 
189 struct packet_sock {
190         /* struct sock has to be the first member of packet_sock */
191         struct sock             sk;
192         struct tpacket_stats    stats;
193 #ifdef CONFIG_PACKET_MMAP
194         char *                  *pg_vec;
195         unsigned int            head;
196         unsigned int            frames_per_block;
197         unsigned int            frame_size;
198         unsigned int            frame_max;
199         int                     copy_thresh;
200 #endif
201         struct packet_type      prot_hook;
202         spinlock_t              bind_lock;
203         char                    running;        /* prot_hook is attached*/
204         int                     ifindex;        /* bound device         */
205         unsigned short          num;
206 #ifdef CONFIG_PACKET_MULTICAST
207         struct packet_mclist    *mclist;
208 #endif
209 #ifdef CONFIG_PACKET_MMAP
210         atomic_t                mapped;
211         unsigned int            pg_vec_order;
212         unsigned int            pg_vec_pages;
213         unsigned int            pg_vec_len;
214 #endif
215 };
216 
217 #ifdef CONFIG_PACKET_MMAP
218 
219 static inline char *packet_lookup_frame(struct packet_sock *po, unsigned int position)
220 {
221         unsigned int pg_vec_pos, frame_offset;
222         char *frame;
223 
224         pg_vec_pos = position / po->frames_per_block;
225         frame_offset = position % po->frames_per_block;
226 
227         frame = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
228         
229         return frame;
230 }
231 #endif
232 
233 static inline struct packet_sock *pkt_sk(struct sock *sk)
234 {
235         return (struct packet_sock *)sk;
236 }
237 
238 static void packet_sock_destruct(struct sock *sk)
239 {
240         BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
241         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
242 
243         if (!sock_flag(sk, SOCK_DEAD)) {
244                 printk("Attempt to release alive packet socket: %p\n", sk);
245                 return;
246         }
247 
248         atomic_dec(&packet_socks_nr);
249 #ifdef PACKET_REFCNT_DEBUG
250         printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
251 #endif
252 }
253 
254 
255 static const struct proto_ops packet_ops;
256 
257 #ifdef CONFIG_SOCK_PACKET
258 static const struct proto_ops packet_ops_spkt;
259 
260 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
261 {
262         struct sock *sk;
263         struct sockaddr_pkt *spkt;
264 
265         /*
266          *      When we registered the protocol we saved the socket in the data
267          *      field for just this event.
268          */
269 
270         sk = pt->af_packet_priv;
271         
272         /*
273          *      Yank back the headers [hope the device set this
274          *      right or kerboom...]
275          *
276          *      Incoming packets have ll header pulled,
277          *      push it back.
278          *
279          *      For outgoing ones skb->data == skb->mac.raw
280          *      so that this procedure is noop.
281          */
282 
283         if (skb->pkt_type == PACKET_LOOPBACK)
284                 goto out;
285 
286         if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
287                 goto oom;
288 
289         /* drop any routing info */
290         dst_release(skb->dst);
291         skb->dst = NULL;
292 
293         /* drop conntrack reference */
294         nf_reset(skb);
295 
296         spkt = (struct sockaddr_pkt*)skb->cb;
297 
298         skb_push(skb, skb->data-skb->mac.raw);
299 
300         /*
301          *      The SOCK_PACKET socket receives _all_ frames.
302          */
303 
304         spkt->spkt_family = dev->type;
305         strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
306         spkt->spkt_protocol = skb->protocol;
307 
308         /*
309          *      Charge the memory to the socket. This is done specifically
310          *      to prevent sockets using all the memory up.
311          */
312 
313         if (sock_queue_rcv_skb(sk,skb) == 0)
314                 return 0;
315 
316 out:
317         kfree_skb(skb);
318 oom:
319         return 0;
320 }
321 
322 
323 /*
324  *      Output a raw packet to a device layer. This bypasses all the other
325  *      protocol layers and you must therefore supply it with a complete frame
326  */
327  
328 static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
329                                struct msghdr *msg, size_t len)
330 {
331         struct sock *sk = sock->sk;
332         struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
333         struct sk_buff *skb;
334         struct net_device *dev;
335         unsigned short proto=0;
336         int err;
337         
338         /*
339          *      Get and verify the address. 
340          */
341 
342         if (saddr)
343         {
344                 if (msg->msg_namelen < sizeof(struct sockaddr))
345                         return(-EINVAL);
346                 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
347                         proto=saddr->spkt_protocol;
348         }
349         else
350                 return(-ENOTCONN);      /* SOCK_PACKET must be sent giving an address */
351 
352         /*
353          *      Find the device first to size check it 
354          */
355 
356         saddr->spkt_device[13] = 0;
357         dev = dev_get_by_name(saddr->spkt_device);
358         err = -ENODEV;
359         if (dev == NULL)
360                 goto out_unlock;
361         
362         /*
363          *      You may not queue a frame bigger than the mtu. This is the lowest level
364          *      raw protocol and you must do your own fragmentation at this level.
365          */
366          
367         err = -EMSGSIZE;
368         if (len > dev->mtu + dev->hard_header_len)
369                 goto out_unlock;
370 
371         err = -ENOBUFS;
372         skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
373 
374         /*
375          *      If the write buffer is full, then tough. At this level the user gets to
376          *      deal with the problem - do your own algorithmic backoffs. That's far
377          *      more flexible.
378          */
379          
380         if (skb == NULL) 
381                 goto out_unlock;
382 
383         /*
384          *      Fill it in 
385          */
386          
387         /* FIXME: Save some space for broken drivers that write a
388          * hard header at transmission time by themselves. PPP is the
389          * notable one here. This should really be fixed at the driver level.
390          */
391         skb_reserve(skb, LL_RESERVED_SPACE(dev));
392         skb->nh.raw = skb->data;
393 
394         /* Try to align data part correctly */
395         if (dev->hard_header) {
396                 skb->data -= dev->hard_header_len;
397                 skb->tail -= dev->hard_header_len;
398                 if (len < dev->hard_header_len)
399                         skb->nh.raw = skb->data;
400         }
401 
402         /* Returns -EFAULT on error */
403         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
404         skb->protocol = proto;
405         skb->dev = dev;
406         skb->priority = sk->sk_priority;
407         if (err)
408                 goto out_free;
409 
410         err = -ENETDOWN;
411         if (!(dev->flags & IFF_UP))
412                 goto out_free;
413 
414         /*
415          *      Now send it
416          */
417 
418         dev_queue_xmit(skb);
419         dev_put(dev);
420         return(len);
421 
422 out_free:
423         kfree_skb(skb);
424 out_unlock:
425         if (dev)
426                 dev_put(dev);
427         return err;
428 }
429 #endif
430 
431 static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res)
432 {
433         struct sk_filter *filter;
434 
435         bh_lock_sock(sk);
436         filter = sk->sk_filter;
437         /*
438          * Our caller already checked that filter != NULL but we need to
439          * verify that under bh_lock_sock() to be safe
440          */
441         if (likely(filter != NULL))
442                 res = sk_run_filter(skb, filter->insns, filter->len);
443         bh_unlock_sock(sk);
444 
445         return res;
446 }
447 
448 /*
449    This function makes lazy skb cloning in hope that most of packets
450    are discarded by BPF.
451 
452    Note tricky part: we DO mangle shared skb! skb->data, skb->len
453    and skb->cb are mangled. It works because (and until) packets
454    falling here are owned by current CPU. Output packets are cloned
455    by dev_queue_xmit_nit(), input packets are processed by net_bh
456    sequencially, so that if we return skb to original state on exit,
457    we will not harm anyone.
458  */
459 
460 static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
461 {
462         struct sock *sk;
463         struct sockaddr_ll *sll;
464         struct packet_sock *po;
465         u8 * skb_head = skb->data;
466         int skb_len = skb->len;
467         unsigned snaplen;
468 
469         if (skb->pkt_type == PACKET_LOOPBACK)
470                 goto drop;
471 
472         sk = pt->af_packet_priv;
473         po = pkt_sk(sk);
474 
475         skb->dev = dev;
476 
477         if (dev->hard_header) {
478                 /* The device has an explicit notion of ll header,
479                    exported to higher levels.
480 
481                    Otherwise, the device hides datails of it frame
482                    structure, so that corresponding packet head
483                    never delivered to user.
484                  */
485                 if (sk->sk_type != SOCK_DGRAM)
486                         skb_push(skb, skb->data - skb->mac.raw);
487                 else if (skb->pkt_type == PACKET_OUTGOING) {
488                         /* Special case: outgoing packets have ll header at head */
489                         skb_pull(skb, skb->nh.raw - skb->data);
490                 }
491         }
492 
493         snaplen = skb->len;
494 
495         if (sk->sk_filter) {
496                 unsigned res = run_filter(skb, sk, snaplen);
497                 if (res == 0)
498                         goto drop_n_restore;
499                 if (snaplen > res)
500                         snaplen = res;
501         }
502 
503         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
504             (unsigned)sk->sk_rcvbuf)
505                 goto drop_n_acct;
506 
507         if (skb_shared(skb)) {
508                 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
509                 if (nskb == NULL)
510                         goto drop_n_acct;
511 
512                 if (skb_head != skb->data) {
513                         skb->data = skb_head;
514                         skb->len = skb_len;
515                 }
516                 kfree_skb(skb);
517                 skb = nskb;
518         }
519 
520         sll = (struct sockaddr_ll*)skb->cb;
521         sll->sll_family = AF_PACKET;
522         sll->sll_hatype = dev->type;
523         sll->sll_protocol = skb->protocol;
524         sll->sll_pkttype = skb->pkt_type;
525         sll->sll_ifindex = dev->ifindex;
526         sll->sll_halen = 0;
527 
528         if (dev->hard_header_parse)
529                 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
530 
531         if (pskb_trim(skb, snaplen))
532                 goto drop_n_acct;
533 
534         skb_set_owner_r(skb, sk);
535         skb->dev = NULL;
536         dst_release(skb->dst);
537         skb->dst = NULL;
538 
539         /* drop conntrack reference */
540         nf_reset(skb);
541 
542         spin_lock(&sk->sk_receive_queue.lock);
543         po->stats.tp_packets++;
544         __skb_queue_tail(&sk->sk_receive_queue, skb);
545         spin_unlock(&sk->sk_receive_queue.lock);
546         sk->sk_data_ready(sk, skb->len);
547         return 0;
548 
549 drop_n_acct:
550         spin_lock(&sk->sk_receive_queue.lock);
551         po->stats.tp_drops++;
552         spin_unlock(&sk->sk_receive_queue.lock);
553 
554 drop_n_restore:
555         if (skb_head != skb->data && skb_shared(skb)) {
556                 skb->data = skb_head;
557                 skb->len = skb_len;
558         }
559 drop:
560         kfree_skb(skb);
561         return 0;
562 }
563 
564 #ifdef CONFIG_PACKET_MMAP
565 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
566 {
567         struct sock *sk;
568         struct packet_sock *po;
569         struct sockaddr_ll *sll;
570         struct tpacket_hdr *h;
571         u8 * skb_head = skb->data;
572         int skb_len = skb->len;
573         unsigned snaplen;
574         unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
575         unsigned short macoff, netoff;
576         struct sk_buff *copy_skb = NULL;
577 
578         if (skb->pkt_type == PACKET_LOOPBACK)
579                 goto drop;
580 
581         sk = pt->af_packet_priv;
582         po = pkt_sk(sk);
583 
584         if (dev->hard_header) {
585                 if (sk->sk_type != SOCK_DGRAM)
586                         skb_push(skb, skb->data - skb->mac.raw);
587                 else if (skb->pkt_type == PACKET_OUTGOING) {
588                         /* Special case: outgoing packets have ll header at head */
589                         skb_pull(skb, skb->nh.raw - skb->data);
590                         if (skb->ip_summed == CHECKSUM_HW)
591                                 status |= TP_STATUS_CSUMNOTREADY;
592                 }
593         }
594 
595         snaplen = skb->len;
596 
597         if (sk->sk_filter) {
598                 unsigned res = run_filter(skb, sk, snaplen);
599                 if (res == 0)
600                         goto drop_n_restore;
601                 if (snaplen > res)
602                         snaplen = res;
603         }
604 
605         if (sk->sk_type == SOCK_DGRAM) {
606                 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
607         } else {
608                 unsigned maclen = skb->nh.raw - skb->data;
609                 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
610                 macoff = netoff - maclen;
611         }
612 
613         if (macoff + snaplen > po->frame_size) {
614                 if (po->copy_thresh &&
615                     atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
616                     (unsigned)sk->sk_rcvbuf) {
617                         if (skb_shared(skb)) {
618                                 copy_skb = skb_clone(skb, GFP_ATOMIC);
619                         } else {
620                                 copy_skb = skb_get(skb);
621                                 skb_head = skb->data;
622                         }
623                         if (copy_skb)
624                                 skb_set_owner_r(copy_skb, sk);
625                 }
626                 snaplen = po->frame_size - macoff;
627                 if ((int)snaplen < 0)
628                         snaplen = 0;
629         }
630         if (snaplen > skb->len-skb->data_len)
631                 snaplen = skb->len-skb->data_len;
632 
633         spin_lock(&sk->sk_receive_queue.lock);
634         h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
635         
636         if (h->tp_status)
637                 goto ring_is_full;
638         po->head = po->head != po->frame_max ? po->head+1 : 0;
639         po->stats.tp_packets++;
640         if (copy_skb) {
641                 status |= TP_STATUS_COPY;
642                 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
643         }
644         if (!po->stats.tp_drops)
645                 status &= ~TP_STATUS_LOSING;
646         spin_unlock(&sk->sk_receive_queue.lock);
647 
648         memcpy((u8*)h + macoff, skb->data, snaplen);
649 
650         h->tp_len = skb->len;
651         h->tp_snaplen = snaplen;
652         h->tp_mac = macoff;
653         h->tp_net = netoff;
654         if (skb->tstamp.off_sec == 0) { 
655                 __net_timestamp(skb);
656                 sock_enable_timestamp(sk);
657         }
658         h->tp_sec = skb->tstamp.off_sec;
659         h->tp_usec = skb->tstamp.off_usec;
660 
661         sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
662         sll->sll_halen = 0;
663         if (dev->hard_header_parse)
664                 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
665         sll->sll_family = AF_PACKET;
666         sll->sll_hatype = dev->type;
667         sll->sll_protocol = skb->protocol;
668         sll->sll_pkttype = skb->pkt_type;
669         sll->sll_ifindex = dev->ifindex;
670 
671         h->tp_status = status;
672         mb();
673 
674         {
675                 struct page *p_start, *p_end;
676                 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
677 
678                 p_start = virt_to_page(h);
679                 p_end = virt_to_page(h_end);
680                 while (p_start <= p_end) {
681                         flush_dcache_page(p_start);
682                         p_start++;
683                 }
684         }
685 
686         sk->sk_data_ready(sk, 0);
687 
688 drop_n_restore:
689         if (skb_head != skb->data && skb_shared(skb)) {
690                 skb->data = skb_head;
691                 skb->len = skb_len;
692         }
693 drop:
694         kfree_skb(skb);
695         return 0;
696 
697 ring_is_full:
698         po->stats.tp_drops++;
699         spin_unlock(&sk->sk_receive_queue.lock);
700 
701         sk->sk_data_ready(sk, 0);
702         if (copy_skb)
703                 kfree_skb(copy_skb);
704         goto drop_n_restore;
705 }
706 
707 #endif
708 
709 
710 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
711                           struct msghdr *msg, size_t len)
712 {
713         struct sock *sk = sock->sk;
714         struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
715         struct sk_buff *skb;
716         struct net_device *dev;
717         unsigned short proto;
718         unsigned char *addr;
719         int ifindex, err, reserve = 0;
720 
721         /*
722          *      Get and verify the address. 
723          */
724          
725         if (saddr == NULL) {
726                 struct packet_sock *po = pkt_sk(sk);
727 
728                 ifindex = po->ifindex;
729                 proto   = po->num;
730                 addr    = NULL;
731         } else {
732                 err = -EINVAL;
733                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
734                         goto out;
735                 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
736                         goto out;
737                 ifindex = saddr->sll_ifindex;
738                 proto   = saddr->sll_protocol;
739                 addr    = saddr->sll_addr;
740         }
741 
742 
743         dev = dev_get_by_index(ifindex);
744         err = -ENXIO;
745         if (dev == NULL)
746                 goto out_unlock;
747         if (sock->type == SOCK_RAW)
748                 reserve = dev->hard_header_len;
749 
750         err = -EMSGSIZE;
751         if (len > dev->mtu+reserve)
752                 goto out_unlock;
753 
754         skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
755                                 msg->msg_flags & MSG_DONTWAIT, &err);
756         if (skb==NULL)
757                 goto out_unlock;
758 
759         skb_reserve(skb, LL_RESERVED_SPACE(dev));
760         skb->nh.raw = skb->data;
761 
762         if (dev->hard_header) {
763                 int res;
764                 err = -EINVAL;
765                 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
766                 if (sock->type != SOCK_DGRAM) {
767                         skb->tail = skb->data;
768                         skb->len = 0;
769                 } else if (res < 0)
770                         goto out_free;
771         }
772 
773         /* Returns -EFAULT on error */
774         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
775         if (err)
776                 goto out_free;
777 
778         skb->protocol = proto;
779         skb->dev = dev;
780         skb->priority = sk->sk_priority;
781 
782         err = -ENETDOWN;
783         if (!(dev->flags & IFF_UP))
784                 goto out_free;
785 
786         /*
787          *      Now send it
788          */
789 
790         err = dev_queue_xmit(skb);
791         if (err > 0 && (err = net_xmit_errno(err)) != 0)
792                 goto out_unlock;
793 
794         dev_put(dev);
795 
796         return(len);
797 
798 out_free:
799         kfree_skb(skb);
800 out_unlock:
801         if (dev)
802                 dev_put(dev);
803 out:
804         return err;
805 }
806 
807 /*
808  *      Close a PACKET socket. This is fairly simple. We immediately go
809  *      to 'closed' state and remove our protocol entry in the device list.
810  */
811 
812 static int packet_release(struct socket *sock)
813 {
814         struct sock *sk = sock->sk;
815         struct packet_sock *po;
816 
817         if (!sk)
818                 return 0;
819 
820         po = pkt_sk(sk);
821 
822         write_lock_bh(&packet_sklist_lock);
823         sk_del_node_init(sk);
824         write_unlock_bh(&packet_sklist_lock);
825 
826         /*
827          *      Unhook packet receive handler.
828          */
829 
830         if (po->running) {
831                 /*
832                  *      Remove the protocol hook
833                  */
834                 dev_remove_pack(&po->prot_hook);
835                 po->running = 0;
836                 po->num = 0;
837                 __sock_put(sk);
838         }
839 
840 #ifdef CONFIG_PACKET_MULTICAST
841         packet_flush_mclist(sk);
842 #endif
843 
844 #ifdef CONFIG_PACKET_MMAP
845         if (po->pg_vec) {
846                 struct tpacket_req req;
847                 memset(&req, 0, sizeof(req));
848                 packet_set_ring(sk, &req, 1);
849         }
850 #endif
851 
852         /*
853          *      Now the socket is dead. No more input will appear.
854          */
855 
856         sock_orphan(sk);
857         sock->sk = NULL;
858 
859         /* Purge queues */
860 
861         skb_queue_purge(&sk->sk_receive_queue);
862 
863         sock_put(sk);
864         return 0;
865 }
866 
867 /*
868  *      Attach a packet hook.
869  */
870 
871 static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
872 {
873         struct packet_sock *po = pkt_sk(sk);
874         /*
875          *      Detach an existing hook if present.
876          */
877 
878         lock_sock(sk);
879 
880         spin_lock(&po->bind_lock);
881         if (po->running) {
882                 __sock_put(sk);
883                 po->running = 0;
884                 po->num = 0;
885                 spin_unlock(&po->bind_lock);
886                 dev_remove_pack(&po->prot_hook);
887                 spin_lock(&po->bind_lock);
888         }
889 
890         po->num = protocol;
891         po->prot_hook.type = protocol;
892         po->prot_hook.dev = dev;
893 
894         po->ifindex = dev ? dev->ifindex : 0;
895 
896         if (protocol == 0)
897                 goto out_unlock;
898 
899         if (dev) {
900                 if (dev->flags&IFF_UP) {
901                         dev_add_pack(&po->prot_hook);
902                         sock_hold(sk);
903                         po->running = 1;
904                 } else {
905                         sk->sk_err = ENETDOWN;
906                         if (!sock_flag(sk, SOCK_DEAD))
907                                 sk->sk_error_report(sk);
908                 }
909         } else {
910                 dev_add_pack(&po->prot_hook);
911                 sock_hold(sk);
912                 po->running = 1;
913         }
914 
915 out_unlock:
916         spin_unlock(&po->bind_lock);
917         release_sock(sk);
918         return 0;
919 }
920 
921 /*
922  *      Bind a packet socket to a device
923  */
924 
925 #ifdef CONFIG_SOCK_PACKET
926 
927 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
928 {
929         struct sock *sk=sock->sk;
930         char name[15];
931         struct net_device *dev;
932         int err = -ENODEV;
933         
934         /*
935          *      Check legality
936          */
937          
938         if (addr_len != sizeof(struct sockaddr))
939                 return -EINVAL;
940         strlcpy(name,uaddr->sa_data,sizeof(name));
941 
942         dev = dev_get_by_name(name);
943         if (dev) {
944                 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
945                 dev_put(dev);
946         }
947         return err;
948 }
949 #endif
950 
951 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
952 {
953         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
954         struct sock *sk=sock->sk;
955         struct net_device *dev = NULL;
956         int err;
957 
958 
959         /*
960          *      Check legality
961          */
962          
963         if (addr_len < sizeof(struct sockaddr_ll))
964                 return -EINVAL;
965         if (sll->sll_family != AF_PACKET)
966                 return -EINVAL;
967 
968         if (sll->sll_ifindex) {
969                 err = -ENODEV;
970                 dev = dev_get_by_index(sll->sll_ifindex);
971                 if (dev == NULL)
972                         goto out;
973         }
974         err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
975         if (dev)
976                 dev_put(dev);
977 
978 out:
979         return err;
980 }
981 
982 static struct proto packet_proto = {
983         .name     = "PACKET",
984         .owner    = THIS_MODULE,
985         .obj_size = sizeof(struct packet_sock),
986 };
987 
988 /*
989  *      Create a packet of type SOCK_PACKET. 
990  */
991 
992 static int packet_create(struct socket *sock, int protocol)
993 {
994         struct sock *sk;
995         struct packet_sock *po;
996         int err;
997 
998         if (!capable(CAP_NET_RAW))
999                 return -EPERM;
1000         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
1001 #ifdef CONFIG_SOCK_PACKET
1002             && sock->type != SOCK_PACKET
1003 #endif
1004             )
1005                 return -ESOCKTNOSUPPORT;
1006 
1007         sock->state = SS_UNCONNECTED;
1008 
1009         err = -ENOBUFS;
1010         sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
1011         if (sk == NULL)
1012                 goto out;
1013 
1014         sock->ops = &packet_ops;
1015 #ifdef CONFIG_SOCK_PACKET
1016         if (sock->type == SOCK_PACKET)
1017                 sock->ops = &packet_ops_spkt;
1018 #endif
1019         sock_init_data(sock, sk);
1020 
1021         po = pkt_sk(sk);
1022         sk->sk_family = PF_PACKET;
1023         po->num = protocol;
1024 
1025         sk->sk_destruct = packet_sock_destruct;
1026         atomic_inc(&packet_socks_nr);
1027 
1028         /*
1029          *      Attach a protocol block
1030          */
1031 
1032         spin_lock_init(&po->bind_lock);
1033         po->prot_hook.func = packet_rcv;
1034 #ifdef CONFIG_SOCK_PACKET
1035         if (sock->type == SOCK_PACKET)
1036                 po->prot_hook.func = packet_rcv_spkt;
1037 #endif
1038         po->prot_hook.af_packet_priv = sk;
1039 
1040         if (protocol) {
1041                 po->prot_hook.type = protocol;
1042                 dev_add_pack(&po->prot_hook);
1043                 sock_hold(sk);
1044                 po->running = 1;
1045         }
1046 
1047         write_lock_bh(&packet_sklist_lock);
1048         sk_add_node(sk, &packet_sklist);
1049         write_unlock_bh(&packet_sklist_lock);
1050         return(0);
1051 out:
1052         return err;
1053 }
1054 
1055 /*
1056  *      Pull a packet from our receive queue and hand it to the user.
1057  *      If necessary we block.
1058  */
1059 
1060 static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1061                           struct msghdr *msg, size_t len, int flags)
1062 {
1063         struct sock *sk = sock->sk;
1064         struct sk_buff *skb;
1065         int copied, err;
1066         struct sockaddr_ll *sll;
1067 
1068         err = -EINVAL;
1069         if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1070                 goto out;
1071 
1072 #if 0
1073         /* What error should we return now? EUNATTACH? */
1074         if (pkt_sk(sk)->ifindex < 0)
1075                 return -ENODEV;
1076 #endif
1077 
1078         /*
1079          *      Call the generic datagram receiver. This handles all sorts
1080          *      of horrible races and re-entrancy so we can forget about it
1081          *      in the protocol layers.
1082          *
1083          *      Now it will return ENETDOWN, if device have just gone down,
1084          *      but then it will block.
1085          */
1086 
1087         skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1088 
1089         /*
1090          *      An error occurred so return it. Because skb_recv_datagram() 
1091          *      handles the blocking we don't see and worry about blocking
1092          *      retries.
1093          */
1094 
1095         if (skb == NULL)
1096                 goto out;
1097 
1098         /*
1099          *      If the address length field is there to be filled in, we fill
1100          *      it in now.
1101          */
1102 
1103         sll = (struct sockaddr_ll*)skb->cb;
1104         if (sock->type == SOCK_PACKET)
1105                 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1106         else
1107                 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1108 
1109         /*
1110          *      You lose any data beyond the buffer you gave. If it worries a
1111          *      user program they can ask the device for its MTU anyway.
1112          */
1113 
1114         copied = skb->len;
1115         if (copied > len)
1116         {
1117                 copied=len;
1118                 msg->msg_flags|=MSG_TRUNC;
1119         }
1120 
1121         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1122         if (err)
1123                 goto out_free;
1124 
1125         sock_recv_timestamp(msg, sk, skb);
1126 
1127         if (msg->msg_name)
1128                 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
1129 
1130         /*
1131          *      Free or return the buffer as appropriate. Again this
1132          *      hides all the races and re-entrancy issues from us.
1133          */
1134         err = (flags&MSG_TRUNC) ? skb->len : copied;
1135 
1136 out_free:
1137         skb_free_datagram(sk, skb);
1138 out:
1139         return err;
1140 }
1141 
1142 #ifdef CONFIG_SOCK_PACKET
1143 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1144                                int *uaddr_len, int peer)
1145 {
1146         struct net_device *dev;
1147         struct sock *sk = sock->sk;
1148 
1149         if (peer)
1150                 return -EOPNOTSUPP;
1151 
1152         uaddr->sa_family = AF_PACKET;
1153         dev = dev_get_by_index(pkt_sk(sk)->ifindex);
1154         if (dev) {
1155                 strlcpy(uaddr->sa_data, dev->name, 15);
1156                 dev_put(dev);
1157         } else
1158                 memset(uaddr->sa_data, 0, 14);
1159         *uaddr_len = sizeof(*uaddr);
1160 
1161         return 0;
1162 }
1163 #endif
1164 
1165 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1166                           int *uaddr_len, int peer)
1167 {
1168         struct net_device *dev;
1169         struct sock *sk = sock->sk;
1170         struct packet_sock *po = pkt_sk(sk);
1171         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1172 
1173         if (peer)
1174                 return -EOPNOTSUPP;
1175 
1176         sll->sll_family = AF_PACKET;
1177         sll->sll_ifindex = po->ifindex;
1178         sll->sll_protocol = po->num;
1179         dev = dev_get_by_index(po->ifindex);
1180         if (dev) {
1181                 sll->sll_hatype = dev->type;
1182                 sll->sll_halen = dev->addr_len;
1183                 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1184                 dev_put(dev);
1185         } else {
1186                 sll->sll_hatype = 0;    /* Bad: we have no ARPHRD_UNSPEC */
1187                 sll->sll_halen = 0;
1188         }
1189         *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1190 
1191         return 0;
1192 }
1193 
1194 #ifdef CONFIG_PACKET_MULTICAST
1195 static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1196 {
1197         switch (i->type) {
1198         case PACKET_MR_MULTICAST:
1199                 if (what > 0)
1200                         dev_mc_add(dev, i->addr, i->alen, 0);
1201                 else
1202                         dev_mc_delete(dev, i->addr, i->alen, 0);
1203                 break;
1204         case PACKET_MR_PROMISC:
1205                 dev_set_promiscuity(dev, what);
1206                 break;
1207         case PACKET_MR_ALLMULTI:
1208                 dev_set_allmulti(dev, what);
1209                 break;
1210         default:;
1211         }
1212 }
1213 
1214 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1215 {
1216         for ( ; i; i=i->next) {
1217                 if (i->ifindex == dev->ifindex)
1218                         packet_dev_mc(dev, i, what);
1219         }
1220 }
1221 
1222 static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1223 {
1224         struct packet_sock *po = pkt_sk(sk);
1225         struct packet_mclist *ml, *i;
1226         struct net_device *dev;
1227         int err;
1228 
1229         rtnl_lock();
1230 
1231         err = -ENODEV;
1232         dev = __dev_get_by_index(mreq->mr_ifindex);
1233         if (!dev)
1234                 goto done;
1235 
1236         err = -EINVAL;
1237         if (mreq->mr_alen > dev->addr_len)
1238                 goto done;
1239 
1240         err = -ENOBUFS;
1241         i = kmalloc(sizeof(*i), GFP_KERNEL);
1242         if (i == NULL)
1243                 goto done;
1244 
1245         err = 0;
1246         for (ml = po->mclist; ml; ml = ml->next) {
1247                 if (ml->ifindex == mreq->mr_ifindex &&
1248                     ml->type == mreq->mr_type &&
1249                     ml->alen == mreq->mr_alen &&
1250                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1251                         ml->count++;
1252                         /* Free the new element ... */
1253                         kfree(i);
1254                         goto done;
1255                 }
1256         }
1257 
1258         i->type = mreq->mr_type;
1259         i->ifindex = mreq->mr_ifindex;
1260         i->alen = mreq->mr_alen;
1261         memcpy(i->addr, mreq->mr_address, i->alen);
1262         i->count = 1;
1263         i->next = po->mclist;
1264         po->mclist = i;
1265         packet_dev_mc(dev, i, +1);
1266 
1267 done:
1268         rtnl_unlock();
1269         return err;
1270 }
1271 
1272 static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1273 {
1274         struct packet_mclist *ml, **mlp;
1275 
1276         rtnl_lock();
1277 
1278         for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1279                 if (ml->ifindex == mreq->mr_ifindex &&
1280                     ml->type == mreq->mr_type &&
1281                     ml->alen == mreq->mr_alen &&
1282                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1283                         if (--ml->count == 0) {
1284                                 struct net_device *dev;
1285                                 *mlp = ml->next;
1286                                 dev = dev_get_by_index(ml->ifindex);
1287                                 if (dev) {
1288                                         packet_dev_mc(dev, ml, -1);
1289                                         dev_put(dev);
1290                                 }
1291                                 kfree(ml);
1292                         }
1293                         rtnl_unlock();
1294                         return 0;
1295                 }
1296         }
1297         rtnl_unlock();
1298         return -EADDRNOTAVAIL;
1299 }
1300 
1301 static void packet_flush_mclist(struct sock *sk)
1302 {
1303         struct packet_sock *po = pkt_sk(sk);
1304         struct packet_mclist *ml;
1305 
1306         if (!po->mclist)
1307                 return;
1308 
1309         rtnl_lock();
1310         while ((ml = po->mclist) != NULL) {
1311                 struct net_device *dev;
1312 
1313                 po->mclist = ml->next;
1314                 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1315                         packet_dev_mc(dev, ml, -1);
1316                         dev_put(dev);
1317                 }
1318                 kfree(ml);
1319         }
1320         rtnl_unlock();
1321 }
1322 #endif
1323 
1324 static int
1325 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1326 {
1327         struct sock *sk = sock->sk;
1328         int ret;
1329 
1330         if (level != SOL_PACKET)
1331                 return -ENOPROTOOPT;
1332 
1333         switch(optname) {
1334 #ifdef CONFIG_PACKET_MULTICAST
1335         case PACKET_ADD_MEMBERSHIP:     
1336         case PACKET_DROP_MEMBERSHIP:
1337         {
1338                 struct packet_mreq_max mreq;
1339                 int len = optlen;
1340                 memset(&mreq, 0, sizeof(mreq));
1341                 if (len < sizeof(struct packet_mreq))
1342                         return -EINVAL;
1343                 if (len > sizeof(mreq))
1344                         len = sizeof(mreq);
1345                 if (copy_from_user(&mreq,optval,len))
1346                         return -EFAULT;
1347                 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1348                         return -EINVAL;
1349                 if (optname == PACKET_ADD_MEMBERSHIP)
1350                         ret = packet_mc_add(sk, &mreq);
1351                 else
1352                         ret = packet_mc_drop(sk, &mreq);
1353                 return ret;
1354         }
1355 #endif
1356 #ifdef CONFIG_PACKET_MMAP
1357         case PACKET_RX_RING:
1358         {
1359                 struct tpacket_req req;
1360 
1361                 if (optlen<sizeof(req))
1362                         return -EINVAL;
1363                 if (copy_from_user(&req,optval,sizeof(req)))
1364                         return -EFAULT;
1365                 return packet_set_ring(sk, &req, 0);
1366         }
1367         case PACKET_COPY_THRESH:
1368         {
1369                 int val;
1370 
1371                 if (optlen!=sizeof(val))
1372                         return -EINVAL;
1373                 if (copy_from_user(&val,optval,sizeof(val)))
1374                         return -EFAULT;
1375 
1376                 pkt_sk(sk)->copy_thresh = val;
1377                 return 0;
1378         }
1379 #endif
1380         default:
1381                 return -ENOPROTOOPT;
1382         }
1383 }
1384 
1385 static int packet_getsockopt(struct socket *sock, int level, int optname,
1386                              char __user *optval, int __user *optlen)
1387 {
1388         int len;
1389         struct sock *sk = sock->sk;
1390         struct packet_sock *po = pkt_sk(sk);
1391 
1392         if (level != SOL_PACKET)
1393                 return -ENOPROTOOPT;
1394 
1395         if (get_user(len, optlen))
1396                 return -EFAULT;
1397 
1398         if (len < 0)
1399                 return -EINVAL;
1400                 
1401         switch(optname) {
1402         case PACKET_STATISTICS:
1403         {
1404                 struct tpacket_stats st;
1405 
1406                 if (len > sizeof(struct tpacket_stats))
1407                         len = sizeof(struct tpacket_stats);
1408                 spin_lock_bh(&sk->sk_receive_queue.lock);
1409                 st = po->stats;
1410                 memset(&po->stats, 0, sizeof(st));
1411                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1412                 st.tp_packets += st.tp_drops;
1413 
1414                 if (copy_to_user(optval, &st, len))
1415                         return -EFAULT;
1416                 break;
1417         }
1418         default:
1419                 return -ENOPROTOOPT;
1420         }
1421 
1422         if (put_user(len, optlen))
1423                 return -EFAULT;
1424         return 0;
1425 }
1426 
1427 
1428 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1429 {
1430         struct sock *sk;
1431         struct hlist_node *node;
1432         struct net_device *dev = (struct net_device*)data;
1433 
1434         read_lock(&packet_sklist_lock);
1435         sk_for_each(sk, node, &packet_sklist) {
1436                 struct packet_sock *po = pkt_sk(sk);
1437 
1438                 switch (msg) {
1439                 case NETDEV_UNREGISTER:
1440 #ifdef CONFIG_PACKET_MULTICAST
1441                         if (po->mclist)
1442                                 packet_dev_mclist(dev, po->mclist, -1);
1443                         // fallthrough
1444 #endif
1445                 case NETDEV_DOWN:
1446                         if (dev->ifindex == po->ifindex) {
1447                                 spin_lock(&po->bind_lock);
1448                                 if (po->running) {
1449                                         __dev_remove_pack(&po->prot_hook);
1450                                         __sock_put(sk);
1451                                         po->running = 0;
1452                                         sk->sk_err = ENETDOWN;
1453                                         if (!sock_flag(sk, SOCK_DEAD))
1454                                                 sk->sk_error_report(sk);
1455                                 }
1456                                 if (msg == NETDEV_UNREGISTER) {
1457                                         po->ifindex = -1;
1458                                         po->prot_hook.dev = NULL;
1459                                 }
1460                                 spin_unlock(&po->bind_lock);
1461                         }
1462                         break;
1463                 case NETDEV_UP:
1464                         spin_lock(&po->bind_lock);
1465                         if (dev->ifindex == po->ifindex && po->num &&
1466                             !po->running) {
1467                                 dev_add_pack(&po->prot_hook);
1468                                 sock_hold(sk);
1469                                 po->running = 1;
1470                         }
1471                         spin_unlock(&po->bind_lock);
1472                         break;
1473                 }
1474         }
1475         read_unlock(&packet_sklist_lock);
1476         return NOTIFY_DONE;
1477 }
1478 
1479 
1480 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1481                         unsigned long arg)
1482 {
1483         struct sock *sk = sock->sk;
1484 
1485         switch(cmd) {
1486                 case SIOCOUTQ:
1487                 {
1488                         int amount = atomic_read(&sk->sk_wmem_alloc);
1489                         return put_user(amount, (int __user *)arg);
1490                 }
1491                 case SIOCINQ:
1492                 {
1493                         struct sk_buff *skb;
1494                         int amount = 0;
1495 
1496                         spin_lock_bh(&sk->sk_receive_queue.lock);
1497                         skb = skb_peek(&sk->sk_receive_queue);
1498                         if (skb)
1499                                 amount = skb->len;
1500                         spin_unlock_bh(&sk->sk_receive_queue.lock);
1501                         return put_user(amount, (int __user *)arg);
1502                 }
1503                 case SIOCGSTAMP:
1504                         return sock_get_timestamp(sk, (struct timeval __user *)arg);
1505                         
1506 #ifdef CONFIG_INET
1507                 case SIOCADDRT:
1508                 case SIOCDELRT:
1509                 case SIOCDARP:
1510                 case SIOCGARP:
1511                 case SIOCSARP:
1512                 case SIOCGIFADDR:
1513                 case SIOCSIFADDR:
1514                 case SIOCGIFBRDADDR:
1515                 case SIOCSIFBRDADDR:
1516                 case SIOCGIFNETMASK:
1517                 case SIOCSIFNETMASK:
1518                 case SIOCGIFDSTADDR:
1519                 case SIOCSIFDSTADDR:
1520                 case SIOCSIFFLAGS:
1521                         return inet_dgram_ops.ioctl(sock, cmd, arg);
1522 #endif
1523 
1524                 default:
1525                         return -ENOIOCTLCMD;
1526         }
1527         return 0;
1528 }
1529 
1530 #ifndef CONFIG_PACKET_MMAP
1531 #define packet_mmap sock_no_mmap
1532 #define packet_poll datagram_poll
1533 #else
1534 
1535 static unsigned int packet_poll(struct file * file, struct socket *sock,
1536                                 poll_table *wait)
1537 {
1538         struct sock *sk = sock->sk;
1539         struct packet_sock *po = pkt_sk(sk);
1540         unsigned int mask = datagram_poll(file, sock, wait);
1541 
1542         spin_lock_bh(&sk->sk_receive_queue.lock);
1543         if (po->pg_vec) {
1544                 unsigned last = po->head ? po->head-1 : po->frame_max;
1545                 struct tpacket_hdr *h;
1546 
1547                 h = (struct tpacket_hdr *)packet_lookup_frame(po, last);
1548 
1549                 if (h->tp_status)
1550                         mask |= POLLIN | POLLRDNORM;
1551         }
1552         spin_unlock_bh(&sk->sk_receive_queue.lock);
1553         return mask;
1554 }
1555 
1556 
1557 /* Dirty? Well, I still did not learn better way to account
1558  * for user mmaps.
1559  */
1560 
1561 static void packet_mm_open(struct vm_area_struct *vma)
1562 {
1563         struct file *file = vma->vm_file;
1564         struct socket * sock = file->private_data;
1565         struct sock *sk = sock->sk;
1566         
1567         if (sk)
1568                 atomic_inc(&pkt_sk(sk)->mapped);
1569 }
1570 
1571 static void packet_mm_close(struct vm_area_struct *vma)
1572 {
1573         struct file *file = vma->vm_file;
1574         struct socket * sock = file->private_data;
1575         struct sock *sk = sock->sk;
1576         
1577         if (sk)
1578                 atomic_dec(&pkt_sk(sk)->mapped);
1579 }
1580 
1581 static struct vm_operations_struct packet_mmap_ops = {
1582         .open = packet_mm_open,
1583         .close =packet_mm_close,
1584 };
1585 
1586 static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
1587 {
1588         return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
1589 }
1590 
1591 static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
1592 {
1593         int i;
1594 
1595         for (i = 0; i < len; i++) {
1596                 if (likely(pg_vec[i]))
1597                         free_pages((unsigned long) pg_vec[i], order);
1598         }
1599         kfree(pg_vec);
1600 }
1601 
1602 static inline char *alloc_one_pg_vec_page(unsigned long order)
1603 {
1604         return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1605                                          order);
1606 }
1607 
1608 static char **alloc_pg_vec(struct tpacket_req *req, int order)
1609 {
1610         unsigned int block_nr = req->tp_block_nr;
1611         char **pg_vec;
1612         int i;
1613 
1614         pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
1615         if (unlikely(!pg_vec))
1616                 goto out;
1617 
1618         for (i = 0; i < block_nr; i++) {
1619                 pg_vec[i] = alloc_one_pg_vec_page(order);
1620                 if (unlikely(!pg_vec[i]))
1621                         goto out_free_pgvec;
1622         }
1623 
1624 out:
1625         return pg_vec;
1626 
1627 out_free_pgvec:
1628         free_pg_vec(pg_vec, order, block_nr);
1629         pg_vec = NULL;
1630         goto out;
1631 }
1632 
1633 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1634 {
1635         char **pg_vec = NULL;
1636         struct packet_sock *po = pkt_sk(sk);
1637         int was_running, num, order = 0;
1638         int err = 0;
1639         
1640         if (req->tp_block_nr) {
1641                 int i, l;
1642 
1643                 /* Sanity tests and some calculations */
1644 
1645                 if (unlikely(po->pg_vec))
1646                         return -EBUSY;
1647 
1648                 if (unlikely((int)req->tp_block_size <= 0))
1649                         return -EINVAL;
1650                 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
1651                         return -EINVAL;
1652                 if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
1653                         return -EINVAL;
1654                 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
1655                         return -EINVAL;
1656 
1657                 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
1658                 if (unlikely(po->frames_per_block <= 0))
1659                         return -EINVAL;
1660                 if (unlikely((po->frames_per_block * req->tp_block_nr) !=
1661                              req->tp_frame_nr))
1662                         return -EINVAL;
1663 
1664                 err = -ENOMEM;
1665                 order = get_order(req->tp_block_size);
1666                 pg_vec = alloc_pg_vec(req, order);
1667                 if (unlikely(!pg_vec))
1668                         goto out;
1669 
1670                 l = 0;
1671                 for (i = 0; i < req->tp_block_nr; i++) {
1672                         char *ptr = pg_vec[i];
1673                         struct tpacket_hdr *header;
1674                         int k;
1675 
1676                         for (k = 0; k < po->frames_per_block; k++) {
1677                                 header = (struct tpacket_hdr *) ptr;
1678                                 header->tp_status = TP_STATUS_KERNEL;
1679                                 ptr += req->tp_frame_size;
1680                         }
1681                 }
1682                 /* Done */
1683         } else {
1684                 if (unlikely(req->tp_frame_nr))
1685                         return -EINVAL;
1686         }
1687 
1688         lock_sock(sk);
1689 
1690         /* Detach socket from network */
1691         spin_lock(&po->bind_lock);
1692         was_running = po->running;
1693         num = po->num;
1694         if (was_running) {
1695                 __dev_remove_pack(&po->prot_hook);
1696                 po->num = 0;
1697                 po->running = 0;
1698                 __sock_put(sk);
1699         }
1700         spin_unlock(&po->bind_lock);
1701                 
1702         synchronize_net();
1703 
1704         err = -EBUSY;
1705         if (closing || atomic_read(&po->mapped) == 0) {
1706                 err = 0;
1707 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1708 
1709                 spin_lock_bh(&sk->sk_receive_queue.lock);
1710                 pg_vec = XC(po->pg_vec, pg_vec);
1711                 po->frame_max = (req->tp_frame_nr - 1);
1712                 po->head = 0;
1713                 po->frame_size = req->tp_frame_size;
1714                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1715 
1716                 order = XC(po->pg_vec_order, order);
1717                 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1718 
1719                 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1720                 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1721                 skb_queue_purge(&sk->sk_receive_queue);
1722 #undef XC
1723                 if (atomic_read(&po->mapped))
1724                         printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1725         }
1726 
1727         spin_lock(&po->bind_lock);
1728         if (was_running && !po->running) {
1729                 sock_hold(sk);
1730                 po->running = 1;
1731                 po->num = num;
1732                 dev_add_pack(&po->prot_hook);
1733         }
1734         spin_unlock(&po->bind_lock);
1735 
1736         release_sock(sk);
1737 
1738         if (pg_vec)
1739                 free_pg_vec(pg_vec, order, req->tp_block_nr);
1740 out:
1741         return err;
1742 }
1743 
1744 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1745 {
1746         struct sock *sk = sock->sk;
1747         struct packet_sock *po = pkt_sk(sk);
1748         unsigned long size;
1749         unsigned long start;
1750         int err = -EINVAL;
1751         int i;
1752 
1753         if (vma->vm_pgoff)
1754                 return -EINVAL;
1755 
1756         size = vma->vm_end - vma->vm_start;
1757 
1758         lock_sock(sk);
1759         if (po->pg_vec == NULL)
1760                 goto out;
1761         if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1762                 goto out;
1763 
1764         start = vma->vm_start;
1765         for (i = 0; i < po->pg_vec_len; i++) {
1766                 struct page *page = virt_to_page(po->pg_vec[i]);
1767                 int pg_num;
1768 
1769                 for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
1770                         err = vm_insert_page(vma, start, page);
1771                         if (unlikely(err))
1772                                 goto out;
1773                         start += PAGE_SIZE;
1774                 }
1775         }
1776         atomic_inc(&po->mapped);
1777         vma->vm_ops = &packet_mmap_ops;
1778         err = 0;
1779 
1780 out:
1781         release_sock(sk);
1782         return err;
1783 }
1784 #endif
1785 
1786 
1787 #ifdef CONFIG_SOCK_PACKET
1788 static const struct proto_ops packet_ops_spkt = {
1789         .family =       PF_PACKET,
1790         .owner =        THIS_MODULE,
1791         .release =      packet_release,
1792         .bind =         packet_bind_spkt,
1793         .connect =      sock_no_connect,
1794         .socketpair =   sock_no_socketpair,
1795         .accept =       sock_no_accept,
1796         .getname =      packet_getname_spkt,
1797         .poll =         datagram_poll,
1798         .ioctl =        packet_ioctl,
1799         .listen =       sock_no_listen,
1800         .shutdown =     sock_no_shutdown,
1801         .setsockopt =   sock_no_setsockopt,
1802         .getsockopt =   sock_no_getsockopt,
1803         .sendmsg =      packet_sendmsg_spkt,
1804         .recvmsg =      packet_recvmsg,
1805         .mmap =         sock_no_mmap,
1806         .sendpage =     sock_no_sendpage,
1807 };
1808 #endif
1809 
1810 static const struct proto_ops packet_ops = {
1811         .family =       PF_PACKET,
1812         .owner =        THIS_MODULE,
1813         .release =      packet_release,
1814         .bind =         packet_bind,
1815         .connect =      sock_no_connect,
1816         .socketpair =   sock_no_socketpair,
1817         .accept =       sock_no_accept,
1818         .getname =      packet_getname, 
1819         .poll =         packet_poll,
1820         .ioctl =        packet_ioctl,
1821         .listen =       sock_no_listen,
1822         .shutdown =     sock_no_shutdown,
1823         .setsockopt =   packet_setsockopt,
1824         .getsockopt =   packet_getsockopt,
1825         .sendmsg =      packet_sendmsg,
1826         .recvmsg =      packet_recvmsg,
1827         .mmap =         packet_mmap,
1828         .sendpage =     sock_no_sendpage,
1829 };
1830 
1831 static struct net_proto_family packet_family_ops = {
1832         .family =       PF_PACKET,
1833         .create =       packet_create,
1834         .owner  =       THIS_MODULE,
1835 };
1836 
1837 static struct notifier_block packet_netdev_notifier = {
1838         .notifier_call =packet_notifier,
1839 };
1840 
1841 #ifdef CONFIG_PROC_FS
1842 static inline struct sock *packet_seq_idx(loff_t off)
1843 {
1844         struct sock *s;
1845         struct hlist_node *node;
1846 
1847         sk_for_each(s, node, &packet_sklist) {
1848                 if (!off--)
1849                         return s;
1850         }
1851         return NULL;
1852 }
1853 
1854 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1855 {
1856         read_lock(&packet_sklist_lock);
1857         return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
1858 }
1859 
1860 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1861 {
1862         ++*pos;
1863         return  (v == SEQ_START_TOKEN) 
1864                 ? sk_head(&packet_sklist) 
1865                 : sk_next((struct sock*)v) ;
1866 }
1867 
1868 static void packet_seq_stop(struct seq_file *seq, void *v)
1869 {
1870         read_unlock(&packet_sklist_lock);               
1871 }
1872 
1873 static int packet_seq_show(struct seq_file *seq, void *v) 
1874 {
1875         if (v == SEQ_START_TOKEN)
1876                 seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
1877         else {
1878                 struct sock *s = v;
1879                 const struct packet_sock *po = pkt_sk(s);
1880 
1881                 seq_printf(seq,
1882                            "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
1883                            s,
1884                            atomic_read(&s->sk_refcnt),
1885                            s->sk_type,
1886                            ntohs(po->num),
1887                            po->ifindex,
1888                            po->running,
1889                            atomic_read(&s->sk_rmem_alloc),
1890                            sock_i_uid(s),
1891                            sock_i_ino(s) );
1892         }
1893 
1894         return 0;
1895 }
1896 
1897 static struct seq_operations packet_seq_ops = {
1898         .start  = packet_seq_start,
1899         .next   = packet_seq_next,
1900         .stop   = packet_seq_stop,
1901         .show   = packet_seq_show,
1902 };
1903 
1904 static int packet_seq_open(struct inode *inode, struct file *file)
1905 {
1906         return seq_open(file, &packet_seq_ops);
1907 }
1908 
1909 static struct file_operations packet_seq_fops = {
1910         .owner          = THIS_MODULE,
1911         .open           = packet_seq_open,
1912         .read           = seq_read,
1913         .llseek         = seq_lseek,
1914         .release        = seq_release,
1915 };
1916 
1917 #endif
1918 
1919 static void __exit packet_exit(void)
1920 {
1921         proc_net_remove("packet");
1922         unregister_netdevice_notifier(&packet_netdev_notifier);
1923         sock_unregister(PF_PACKET);
1924         proto_unregister(&packet_proto);
1925 }
1926 
1927 static int __init packet_init(void)
1928 {
1929         int rc = proto_register(&packet_proto, 0);
1930 
1931         if (rc != 0)
1932                 goto out;
1933 
1934         sock_register(&packet_family_ops);
1935         register_netdevice_notifier(&packet_netdev_notifier);
1936         proc_net_fops_create("packet", 0, &packet_seq_fops);
1937 out:
1938         return rc;
1939 }
1940 
1941 module_init(packet_init);
1942 module_exit(packet_exit);
1943 MODULE_LICENSE("GPL");
1944 MODULE_ALIAS_NETPROTO(PF_PACKET);
1945 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.