背景
最近在排查一个网络问题,ifconfig eth0 up 后,网卡link up比较慢。因此,分析了下从ifconfig up 到网络驱动的调用流程。这里顺便作个记录。
ifconfig eth0 up 调用的是busybox 的命令,因此从busybox 源码入手,逐步分析下调用流程。代码介绍文件位于:networking/ifenslave.c
ifconfig eth0 up
ifconfig eth0 up 和 ifconfig eth0 down 分别对应busybox 的set_if_up()和set_if_down().
staticintset_if_down(char*ifname,intflags) { intres=set_if_flags(ifname,flags&~IFF_UP); if(res) bb_perror_msg("%s:can'tdown",ifname); returnres; }
staticintset_if_up(char*ifname,intflags) { intres=set_if_flags(ifname,flags|IFF_UP); if(res) bb_perror_msg("%s:can'tup",ifname); returnres; }
比如,当我们敲ifconfig eth0 down时,实则就是调用:
set_if_down("eth0",master_flags.ifr_flags);
set_if_flags()会将网卡名,up / down 标志位flags通过ioctl命令SIOCSIFFLAGS 传递给内核网卡驱动。
staticintset_if_flags(char*ifname,intflags) { structifreqifr; ifr.ifr_flags=flags; returnset_ifrname_and_do_ioctl(SIOCSIFFLAGS,&ifr,ifname); }
dev_ifsioc
接着深入到内核代码中,看下SIOCSIFFLAGS命令在哪里实现。代码位于kernel etcoredev_ioctl.c。
staticintdev_ifsioc(structnet*net,structifreq*ifr,unsignedintcmd) { interr; structnet_device*dev=__dev_get_by_name(net,ifr->ifr_name); conststructnet_device_ops*ops; if(!dev) return-ENODEV; ops=dev->netdev_ops; switch(cmd){ caseSIOCSIFFLAGS:/*Setinterfaceflags*/ returndev_change_flags(dev,ifr->ifr_flags); caseSIOCSIFMETRIC:/*Setthemetricontheinterface (currentlyunused)*/ return-EOPNOTSUPP; ................... } returnerr; }
dev_ifsioc()会调用__dev_get_by_name()根据 网卡名遍历 net链表,如果匹配到则返回net_device结构体指针。接着,SIOCSIFFLAGS会调用到dev_change_flags(),最后调用到__dev_change_flags()。
dev_change_flags
intdev_change_flags(structnet_device*dev,unsignedintflags) { intret; unsignedintchanges,old_flags=dev->flags,old_gflags=dev->gflags; ret=__dev_change_flags(dev,flags); if(ret< 0) return ret; changes = (old_flags ^ dev->flags)|(old_gflags^dev->gflags); __dev_notify_flags(dev,old_flags,changes); returnret; }
int__dev_change_flags(structnet_device*dev,unsignedintflags) { unsignedintold_flags=dev->flags; intret; ASSERT_RTNL(); /* *Settheflagsonourdevice. */ dev->flags=(flags&(IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP| IFF_DYNAMIC|IFF_MULTICAST|IFF_PORTSEL| IFF_AUTOMEDIA))| (dev->flags&(IFF_UP|IFF_VOLATILE|IFF_PROMISC| IFF_ALLMULTI)); /* *Loadinthecorrectmulticastlistnowtheflagshavechanged. */ if((old_flags^flags)&IFF_MULTICAST) dev_change_rx_flags(dev,IFF_MULTICAST); dev_set_rx_mode(dev); /* *Havewedownedtheinterface.WehandleIFF_UPourselves *accordingtouserattemptstosetit,ratherthanblindly *settingit. */ ret=0; /*两个标识有一个是IFF_UP*/ if((old_flags^flags)&IFF_UP) ret=((old_flags&IFF_UP)?__dev_close:__dev_open)(dev);//通过flags判断调用__dev_close还是__dev_open if((flags^dev->gflags)&IFF_PROMISC){ intinc=(flags&IFF_PROMISC)?1:-1; unsignedintold_flags=dev->flags; dev->gflags^=IFF_PROMISC; if(__dev_set_promiscuity(dev,inc,false)>=0) if(dev->flags!=old_flags) dev_set_rx_mode(dev); } /*NOTE:orderofsynchronizationofIFF_PROMISCandIFF_ALLMULTI isimportant.Some(broken)driverssetIFF_PROMISC,when IFF_ALLMULTIisrequestednotaskingusandnotreporting. */ if((flags^dev->gflags)&IFF_ALLMULTI){ intinc=(flags&IFF_ALLMULTI)?1:-1; dev->gflags^=IFF_ALLMULTI; __dev_set_allmulti(dev,inc,false); } returnret; }
在__dev_change_flags(dev, flags)函数中,通过判断flag的IFF_UP位上的值是否相反,来实现是调用__dev_close()还是__dev_open()来开关eth0。
__dev_close
__dev_close中会将当前的net_device加入到等待设备关闭列表中。
staticint__dev_close(structnet_device*dev) { intretval; LIST_HEAD(single); list_add(&dev->close_list,&single); retval=__dev_close_many(&single); list_del(&single); returnretval; }
__dev_close_many
__dev_close_many通知设备正在关闭,等待未发送完的数据发送完,最后清除开启标记。
staticint__dev_close_many(structlist_head*head) { structnet_device*dev; ASSERT_RTNL(); might_sleep(); list_for_each_entry(dev,head,close_list){ /*Temporarilydisablenetpolluntiltheinterfaceisdown*/ /*禁用netpoll*/ netpoll_poll_disable(dev); /*通知设备正在关闭*/ call_netdevice_notifiers(NETDEV_GOING_DOWN,dev); /*清除start标志位*/ clear_bit(__LINK_STATE_START,&dev->state); /*Synchronizetoscheduledpoll.Wecannottouchpolllist,it *canbeevenondifferentcpu.Sojustclearnetif_running(). * *dev->stop()willinvokenapi_disable()onallofit's *napi_structinstancesonthisdevice. */ smp_mb__after_atomic();/*Commitnetif_running().*/ } /*未发送完的数据发送完*/ dev_deactivate_many(head); list_for_each_entry(dev,head,close_list){ conststructnet_device_ops*ops=dev->netdev_ops; /* *Callthedevicespecificclose.Thiscannotfail. *OnlyifdeviceisUP * *WeallowittobecalledevenafteraDETACHhot-plug *event. */ /*调用设备关闭操作*/ if(ops->ndo_stop) ops->ndo_stop(dev); /*标记设备关闭*/ dev->flags&=~IFF_UP; /*启用netpoll*/ netpoll_poll_enable(dev); } return0; }
ndo_stop
ndo_stop为关闭网卡时,不同网卡驱动注册的不同的关闭函数,我们以海思的网卡驱动为例,分析下ndo_stop函数的实现。代码位于kerneldrivers etethernethisiliconhnshns_enet.c。
hns_nic_net_stop
staticinthns_nic_net_stop(structnet_device*ndev) { hns_nic_net_down(ndev); return0; }
hns_nic_net_down
staticvoidhns_nic_net_down(structnet_device*ndev) { inti; structhnae_ae_ops*ops; structhns_nic_priv*priv=netdev_priv(ndev); if(test_and_set_bit(NIC_STATE_DOWN,&priv->state)) return; (void)del_timer_sync(&priv->service_timer); netif_tx_stop_all_queues(ndev); netif_carrier_off(ndev); netif_tx_disable(ndev); priv->link=0; if(priv->phy) phy_stop(priv->phy); ops=priv->ae_handle->dev->ops; if(ops->stop) ops->stop(priv->ae_handle); netif_tx_stop_all_queues(ndev); for(i=priv->ae_handle->q_num-1;i>=0;i--){ hns_nic_ring_close(ndev,i); hns_nic_ring_close(ndev,i+priv->ae_handle->q_num); /*cleantxbuffers*/ hns_nic_tx_clr_all_bufs(priv->ring_data+i); } }
hns_nic_net_down()中会调用netif_carrier_off()通知内核子系统网络断开。下面我们详细分析下netif_carrier_off()的实现。
netif_carrier_off()
voidnetif_carrier_off(structnet_device*dev) { /*设置网卡为载波断开状态即nocarrier状态,上行时软中断下半部读到该状态不会进行网卡收包*/ if(!test_and_set_bit(__LINK_STATE_NOCARRIER,&dev->state)){ if(dev->reg_state==NETREG_UNINITIALIZED) return; /*增加设备改变状态*/ atomic_inc(&dev->carrier_changes); /*加入事件处理队列进行处理*/ linkwatch_fire_event(dev); } }
linkwatch_fire_event()
linkwatch_fire_event()函数将设备加入到事件队列,并且进行事件调度,调度中会根据是否为紧急事件做不同处理。
voidlinkwatch_fire_event(structnet_device*dev) { /*判断是否是紧急处理的事件*/ boolurgent=linkwatch_urgent_event(dev); /*判断是否是紧急处理的事件*/ if(!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING,&dev->state)){ /*添加事件到事件列表*/ linkwatch_add_event(dev); }elseif(!urgent) /*设备以前已经设置了pending标记,不是紧急事件,直接返回*/ return; /*事件调度*/ linkwatch_schedule_work(urgent); }
linkwatch_urgent_event()
linkwatch_urgent_event()判断是否是否需要紧急处理。
staticboollinkwatch_urgent_event(structnet_device*dev) { /*设备未运行,非紧急*/ if(!netif_running(dev)) returnfalse; /*设备的索引号与连接索引号不等,紧急*/ if(dev->ifindex!=dev_get_iflink(dev)) returntrue; /*设备作为teamport,紧急*/ if(dev->priv_flags&IFF_TEAM_PORT) returntrue; /*连接与否&&发送队列排队规则改变与否*/ returnnetif_carrier_ok(dev)&&qdisc_tx_changing(dev); }
linkwatch_add_event()
linkwatch_add_event()将设备加入到事件处理链表。
staticvoidlinkwatch_add_event(structnet_device*dev) { unsignedlongflags; spin_lock_irqsave(&lweventlist_lock,flags); /*若未添加,则添加设备到事件列表*/ if(list_empty(&dev->link_watch_list)){ list_add_tail(&dev->link_watch_list,&lweventlist); dev_hold(dev); } spin_unlock_irqrestore(&lweventlist_lock,flags); }
linkwatch_schedule_work()
linkwatch_schedule_work()对事件处理进行调度,紧急事件立即执行,非紧急事件延后执行。
staticvoidlinkwatch_schedule_work(inturgent) { unsignedlongdelay=linkwatch_nextevent-jiffies; /*已经设置了紧急标记,则返回*/ if(test_bit(LW_URGENT,&linkwatch_flags)) return; /*需要紧急调度*/ if(urgent){ /*之前设置了,则返回*/ if(test_and_set_bit(LW_URGENT,&linkwatch_flags)) return; /*未设置紧急,则立即执行*/ delay=0; } /*如果大于1s则立即执行*/ if(delay>HZ) delay=0; /*如果设置了紧急标记,则立即执行*/ if(test_bit(LW_URGENT,&linkwatch_flags)) mod_delayed_work(system_wq,&linkwatch_work,0); else /*未设置紧急标记,则按照delay执行*/ schedule_delayed_work(&linkwatch_work,delay); }
__linkwatch_run_queue()
__linkwatch_run_queue()完成对事件调度队列中设备的处理。
staticvoid__linkwatch_run_queue(inturgent_only) { structnet_device*dev; LIST_HEAD(wrk); /* *Limitthenumberoflinkwatcheventstoone *persecondsothatarunawaydriverdoesnot *causeastormofmessagesonthenetlink *socket.Thislimitdoesnotapplytoupevents *whilethedeviceqdiscisdown. */ /*已达到调度时间*/ if(!urgent_only) linkwatch_nextevent=jiffies+HZ; /*Limitwrap-aroundeffectondelay.*/ /* 未到达调度时间,并且下一次调度在当前时间的1s以后 那么设置调度时间是当前时间 */ elseif(time_after(linkwatch_nextevent,jiffies+HZ)) linkwatch_nextevent=jiffies; /*清除紧急标识*/ clear_bit(LW_URGENT,&linkwatch_flags); spin_lock_irq(&lweventlist_lock); list_splice_init(&lweventlist,&wrk); /*遍历链表*/ while(!list_empty(&wrk)){ /*获取设备*/ dev=list_first_entry(&wrk,structnet_device,link_watch_list); /*从链表移除设备*/ list_del_init(&dev->link_watch_list); /*未到达调度时间&&不需要紧急处理*/ if(urgent_only&&!linkwatch_urgent_event(dev)){ /*添加到链表尾部*/ list_add_tail(&dev->link_watch_list,&lweventlist); /*继续处理*/ continue; } spin_unlock_irq(&lweventlist_lock); /*处理设备*/ linkwatch_do_dev(dev); spin_lock_irq(&lweventlist_lock); } /*链表有未处理事件,则以非紧急状态调度队列*/ if(!list_empty(&lweventlist)) linkwatch_schedule_work(0); spin_unlock_irq(&lweventlist_lock); }
linkwatch_do_dev()
linkwatch_do_dev()完成对某个设备的状态改变处理。
staticvoidlinkwatch_do_dev(structnet_device*dev) { /* *Makesuretheabovereadiscompletesinceitcanbe *rewrittenassoonasweclearthebitbelow. */ smp_mb__before_atomic(); /*Weareabouttohandlethisdevice, *soneweventscanbeaccepted */ /*清除pending标记*/ clear_bit(__LINK_STATE_LINKWATCH_PENDING,&dev->state); rfc2863_policy(dev); /*如果设备启动状态*/ if(dev->flags&IFF_UP){ /*链路连接*/ if(netif_carrier_ok(dev)) /*启用排队规则*/ dev_activate(dev); else /*关闭排队规则*/ dev_deactivate(dev); /*设备状态改变处理,执行netdev_chain上设备状态变更回调*/ netdev_state_change(dev); } dev_put(dev); }
phy_stop()
最后,hns_nic_net_down()中会调用phy_stop()将网卡link down。
voidphy_stop(structphy_device*phydev) { mutex_lock(&phydev->lock); if(PHY_HALTED==phydev->state) gotoout_unlock; if(phy_interrupt_is_valid(phydev)){ /*DisablePHYInterrupts*/ phy_config_interrupt(phydev,PHY_INTERRUPT_DISABLED); /*Clearanypendinginterrupts*/ phy_clear_interrupt(phydev); } phydev->state=PHY_HALTED; out_unlock: mutex_unlock(&phydev->lock); /*Cannotcallflush_scheduled_work()hereasdesiredbecause *ofrtnl_lock(),butPHY_HALTEDshallguaranteephy_change() *willnotreenableinterrupts. */ }
phy_stop()将phydev->state设置为PHY_HALTED,将网卡关闭。
__dev_open
__dev_open为设备启用核心函数,该函数打开eth0,设置启用标记,并且设置接收模式,排队规则等。
staticint__dev_open(structnet_device*dev) { conststructnet_device_ops*ops=dev->netdev_ops; intret; ASSERT_RTNL(); /*设备不可用*/ if(!netif_device_present(dev)) return-ENODEV; /*Blocknetpollfromtryingtodoanyrxpathservicing. *Ifwedon'tdothisthereisachancendo_poll_controller *orndo_pollmayberunningwhileweopenthedevice */ /*禁用netpoll*/ netpoll_poll_disable(dev); /*设备打开前通知*/ ret=call_netdevice_notifiers(NETDEV_PRE_UP,dev); ret=notifier_to_errno(ret); if(ret) returnret; /*设置设备打开标记,设备将设置IFF_UP标志位*/ set_bit(__LINK_STATE_START,&dev->state); /*校验地址*/ if(ops->ndo_validate_addr) ret=ops->ndo_validate_addr(dev); /*执行打开*/ if(!ret&&ops->ndo_open) ret=ops->ndo_open(dev); /*启用netpoll*/ netpoll_poll_enable(dev); /*失败,清除打开标记*/ if(ret) clear_bit(__LINK_STATE_START,&dev->state); /*设备打开操作*/ else{ /*设置打开标记*/ dev->flags|=IFF_UP; /*设置接收模式*/ dev_set_rx_mode(dev); /*初始化排队规则*/ dev_activate(dev); /*加入设备数据到熵池*/ add_device_randomness(dev->dev_addr,dev->addr_len); } returnret; }
hns_nic_net_open()
我们以海思的网卡驱动为例,分析下ndo_open()函数的实现。代码位于kerneldrivers etethernethisiliconhnshns_enet.c。
staticinthns_nic_net_open(structnet_device*ndev) { structhns_nic_priv*priv=netdev_priv(ndev); structhnae_handle*h=priv->ae_handle; intret; if(test_bit(NIC_STATE_TESTING,&priv->state)) return-EBUSY; priv->link=0; netif_carrier_off(ndev); /*设置txqueue的个数*/ ret=netif_set_real_num_tx_queues(ndev,h->q_num); if(ret< 0) { netdev_err(ndev, "netif_set_real_num_tx_queues fail, ret=%d! ", ret); return ret; } /*设置rx queue的个数*/ ret = netif_set_real_num_rx_queues(ndev, h->q_num); if(ret< 0) { netdev_err(ndev, "netif_set_real_num_rx_queues fail, ret=%d! ", ret); return ret; } /*启动网卡*/ ret = hns_nic_net_up(ndev); if (ret) { netdev_err(ndev, "hns net up fail, ret=%d! ", ret); return ret; } return 0; }
hns_nic_net_up()
staticinthns_nic_net_up(structnet_device*ndev) { structhns_nic_priv*priv=netdev_priv(ndev); structhnae_handle*h=priv->ae_handle; inti,j,k; intret; /*初始化中断,并设置中断函数为hns_irq_handle,每个rx和txqueue都对应一个中断*/ ret=hns_nic_init_irq(priv); if(ret!=0){ netdev_err(ndev,"hnsinitirqfailed!ret=%d ",ret); returnret; } for(i=0;i< h->q_num*2;i++){ /*使能中断,使能napi*/ ret=hns_nic_ring_open(ndev,i); if(ret) gotoout_has_some_queues; } for(k=0;k< h->q_num;k++) h->dev->ops->toggle_queue_status(h->qs[k],1); /*设置mac地址*/ ret=h->dev->ops->set_mac_addr(h,ndev->dev_addr); if(ret) gotoout_set_mac_addr_err; /*hns的start函数为null*/ ret=h->dev->ops->start?h->dev->ops->start(h):0; if(ret) gotoout_start_err; if(priv->phy) /*启动phy*/ phy_start(priv->phy); clear_bit(NIC_STATE_DOWN,&priv->state); /*修改time每一秒到期一次*/ (void)mod_timer(&priv->service_timer,jiffies+SERVICE_TIMER_HZ); return0; out_start_err: netif_stop_queue(ndev); out_set_mac_addr_err: for(k=0;k< h->q_num;k++) h->dev->ops->toggle_queue_status(h->qs[k],0); out_has_some_queues: for(j=i-1;j>=0;j--) hns_nic_ring_close(ndev,j); set_bit(NIC_STATE_DOWN,&priv->state); returnret; }
phy_start()
最后会调用到phy_start()启动网卡。
voidphy_start(structphy_device*phydev) { booldo_resume=false; interr=0; mutex_lock(&phydev->lock); switch(phydev->state){ casePHY_STARTING: phydev->state=PHY_PENDING; break; casePHY_READY: phydev->state=PHY_UP; break; casePHY_HALTED: /*makesureinterruptsarere-enabledforthePHY*/ err=phy_enable_interrupts(phydev); if(err< 0) break; phydev->state=PHY_RESUMING; do_resume=true; break; default: break; } mutex_unlock(&phydev->lock); /*ifphywassuspended,bringthephysicallinkupagain*/ if(do_resume) phy_resume(phydev); }
审核编辑:刘清
-
网卡驱动
+关注
关注
0文章
35浏览量
17694
原文标题:【网络驱动】ifconfig up 后内核网络驱动做了什么?
文章出处:【微信号:嵌入式与Linux那些事,微信公众号:嵌入式与Linux那些事】欢迎添加关注!文章转载请注明出处。
发布评论请先 登录
相关推荐
评论