Linux那些事兒之我是Block層(9)scsi命令的前世今生(三)


下一個更為重要的函數是scsi_dispatch_cmd,來自drivers/scsi/scsi.c:

    459 /*

    460  * Function:    scsi_dispatch_command

    461  *

    462  * Purpose:     Dispatch a command to the low-level driver.

    463  *

    464  * Arguments:   cmd - command block we are dispatching.

    465  *

    466  * Notes:

    467  */

    468 int scsi_dispatch_cmd(struct scsi_cmnd *cmd)

    469 {

    470         struct Scsi_Host *host = cmd->device->host;

    471         unsigned long flags = 0;

    472         unsigned long timeout;

    473         int rtn = 0;

    474

    475         /* check if the device is still usable */

    476         if (unlikely(cmd->device->sdev_state == SDEV_DEL)) {

    477                 /* in SDEV_DEL we error all commands. DID_NO_CONNECT

    478                  * returns an immediate error upwards, and signals

    479                  * that the device is no longer present */

    480                 cmd->result = DID_NO_CONNECT << 16;

    481                 atomic_inc(&cmd->device->iorequest_cnt);

    482                 __scsi_done(cmd);

    483                 /* return 0 (because the command has been processed) */

    484                 goto out;

    485         }

    486

    487         /* Check to see if the scsi lld put this device into state SDEV_BLOCK. */

    488         if (unlikely(cmd->device->sdev_state == SDEV_BLOCK)) {

    489                 /*

    490                  * in SDEV_BLOCK, the command is just put back on the device

    491                  * queue.  The suspend state has already blocked the queue so

    492                  * future requests should not occur until the device

    493                  * transitions out of the suspend state.

    494                  */

    495                 scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);

    496

    497                 SCSI_LOG_MLQUEUE(3, printk("queuecommand : device blocked /n"));

    498

    499                 /*

    500                  * NOTE: rtn is still zero here because we don't need the

    501                  * queue to be plugged on return (it's already stopped)

    502                  */

    503                 goto out;

    504         }

    505

    506         /*

    507          * If SCSI-2 or lower, store the LUN value in cmnd.

    508          */

    509         if (cmd->device->scsi_level <= SCSI_2 &&

    510             cmd->device->scsi_level != SCSI_UNKNOWN) {

    511                 cmd->cmnd[1] = (cmd->cmnd[1] & 0x1f) |

    512                                (cmd->device->lun << 5 & 0xe0);

    513         }

    514

    515         /*

    516          * We will wait MIN_RESET_DELAY clock ticks after the last reset so

    517          * we can avoid the drive not being ready.

    518          */

    519         timeout = host->last_reset + MIN_RESET_DELAY;

    520

    521         if (host->resetting && time_before(jiffies, timeout)) {

    522                 int ticks_remaining = timeout - jiffies;

    523                 /*

    524                  * NOTE: This may be executed from within an interrupt

    525                  * handler!  This is bad, but for now, it'll do.  The irq

    526                  * level of the interrupt handler has been masked out by the

    527                  * platform dependent interrupt handling code already, so the

    528                  * sti() here will not cause another call to the SCSI host's

    529                  * interrupt handler (assuming there is one irq-level per

    530                  * host).

    531                  */

    532                 while (--ticks_remaining >= 0)

    533                         mdelay(1 + 999 / HZ);

    534                 host->resetting = 0;

    535         }

    536

    537         /*

    538          * AK: unlikely race here: for some reason the timer could

    539          * expire before the serial number is set up below.

540          */

    541         scsi_add_timer(cmd, cmd->timeout_per_command, scsi_times_out);

    542

    543         scsi_log_send(cmd);

    544

    545         /*

    546          * We will use a queued command if possible, otherwise we will

    547          * emulate the queuing and calling of completion function ourselves.

    548          */

    549         atomic_inc(&cmd->device->iorequest_cnt);

    550

    551         /*

    552          * Before we queue this command, check if the command

    553          * length exceeds what the host adapter can handle.

    554          */

    555         if (CDB_SIZE(cmd) > cmd->device->host->max_cmd_len) {

    556                 SCSI_LOG_MLQUEUE(3,

    557                                 printk("queuecommand : command too long./n"));

    558                 cmd->result = (DID_ABORT << 16);

    559

    560                 scsi_done(cmd);

    561                 goto out;

    562         }

    563

    564         spin_lock_irqsave(host->host_lock, flags);

    565         scsi_cmd_get_serial(host, cmd);

    566

    567         if (unlikely(host->shost_state == SHOST_DEL)) {

    568                 cmd->result = (DID_NO_CONNECT << 16);

    569                 scsi_done(cmd);

    570         } else {

    571                 rtn = host->hostt->queuecommand(cmd, scsi_done);

    572         }

    573         spin_unlock_irqrestore(host->host_lock, flags);

    574         if (rtn) {

    575                 if (scsi_delete_timer(cmd)) {

    576                         atomic_inc(&cmd->device->iodone_cnt);

    577                         scsi_queue_insert(cmd,

    578                                           (rtn == SCSI_MLQUEUE_DEVICE_BUSY) ?

    579                                           rtn : SCSI_MLQUEUE_HOST_BUSY);

    580                 }

581                 SCSI_LOG_MLQUEUE(3,

    582                     printk("queuecommand : request rejected/n"));

    583         }

    584

    585  out:

    586         SCSI_LOG_MLQUEUE(3, printk("leaving scsi_dispatch_cmnd()/n"));

    587         return rtn;

    588 }

一路走來的兄弟一定會一眼就看出這里我們最期待的一行代碼就是571那個queuecommand()的調用.因為這之后我們就知道該發生什么了.比如對於U盤驅動來說,命令就從這里接過去開始執行.而對於實際的scsi控制器,其對應的驅動中的queuecommand也會被調用,剩下的事情我們就不用操心了.正常情況下queuecommand返回0.於是緊接着scsi_dispatch_cmd也返回0.這樣就算是執行了一條scsi命令了.

scsi_request_fn()是否結束還得看while循環的條件是否滿足,而這就得看blk_queue_plugged()的臉色了.那么我們從字面上來分析,什么叫queue plugged?我那盜版金山詞霸告訴我plugged就是塞緊的意思,你說隊列塞緊的是什么意思?比如說,北四環上上下班高峰期,許許多多的車輛排成一隊又一隊,但是可能半天都前進不了,這就叫塞緊,或者說堵車,也叫塞車.為此咱們使用一個flag來標志堵車與否,來自include/linux/blkdev.h:

    523 #define blk_queue_plugged(q)    test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)

改變這個這個flag的函數有兩個,一個是設置,一個是取消.

負責設置的是blk_plug_device.

   1542 /*

   1543  * "plug" the device if there are no outstanding requests: this will

   1544  * force the transfer to start only after we have put all the requests

   1545  * on the list.

   1546  *

   1547  * This is called with interrupts off and no requests on the queue and

   1548  * with the queue lock held.

   1549  */

   1550 void blk_plug_device(request_queue_t *q)

   1551 {

   1552         WARN_ON(!irqs_disabled());

   1553

   1554         /*

   1555      * don't plug a stopped queue, it must be paired with blk_start_queue()

   1556          * which will restart the queueing

   1557          */

   1558         if (blk_queue_stopped(q))

   1559                 return;

   1560

   1561         if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {

   1562                 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);

   1563                 blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);

   1564         }

   1565 }

負責取消的是blk_remove_plug().

   1569 /*

   1570  * remove the queue from the plugged list, if present. called with

   1571  * queue lock held and interrupts disabled.

   1572  */

   1573 int blk_remove_plug(request_queue_t *q)

   1574 {

   1575         WARN_ON(!irqs_disabled());

   1576

   1577         if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))

   1578                 return 0;

   1579

   1580         del_timer(&q->unplug_timer);

   1581         return 1;

   1582 }

而調用前者的地方不少,比如我們見到的__elv_add_request,其第四個參數int plug就可以控制是否調用blk_plug_device(),而當我們在blk_execute_rq_nowait()中調用__elv_add_request()的時候傳遞的plug就是1.

另一方面,調用blk_remove_plug的地方也有多處.其中__generic_unplug_device()就是之一.所以在咱們這個上下文里,實際上並沒有設置這個flag,因此scsi_request_fn()就會被執行.

那么編寫這兩個函數究竟是為了什么呢?這年頭,有人做賊,我可以理解是為了劫富濟貧,有人殺人,我可以理解是為了伸張正義,甚至有女人紅杏出牆,我還可以理解是為了繁榮經濟.然而,很長一段時間我都沒辦法理解有人編寫這兩個函數是為了什么?

后來我想,不妨這樣理解,假設你經常開車經過長安街,你會發現經常有戒嚴的現象發生,比如某位領導人要出行,比如某位領導人要來訪,而你可以把blk_plug_device()想象成戒嚴,blk_remove_plug想象成開放.車流要想行進,前提條件是沒有戒嚴,換言之,沒有設卡,QUEUE_FLAG_PLUGGED這個flag就相當於”,設了它隊列就不能前進了,沒有設才有可能前進.之所以需要設卡,是因為確實有這個需求,有時候確實不想讓隊列前進.

那么這里我們還看到兩個函數被調用了,mod_timerdel_timer,這是干嘛使的?還記得kblockd?最早咱們創建了那個工作隊列kblockd_workqueue,現在是它該出場的時間了.讓我們把鏡頭拉回到函數blk_init_queue_node().這個函數我們曾經看過,所以這里只貼出其中跟我們這里密切相關的幾行:

   1922         q->request_fn           = rfn;

   1923         q->prep_rq_fn           = NULL;

   1924         q->unplug_fn            = generic_unplug_device;

   1925         q->queue_flags          = (1 << QUEUE_FLAG_CLUSTER);

   1926         q->queue_lock           = lock;

   1927

   1928         blk_queue_segment_boundary(q, 0xffffffff);

   1929

   1930         blk_queue_make_request(q, __make_request);

首先q->unplug_fn被賦上了generic_unplug_device.這一點很重要,稍后會用到.

然后來看blk_queue_make_request().這個函數當時咱們並沒有講過.來自block/ll_rw_block.c:

    180 /**

    181  * blk_queue_make_request - define an alternate make_request function for a device

    182  * @q:  the request queue for the device to be affected

    183  * @mfn: the alternate make_request function

    184  *

    185  * Description:

    186  *    The normal way for &struct bios to be passed to a device

    187  *    driver is for them to be collected into requests on a request

    188  *    queue, and then to allow the device driver to select requests

    189  *    off that queue when it is ready.  This works well for many block

    190  *    devices. However some block devices (typically virtual devices

    191  *    such as md or lvm) do not benefit from the processing on the

    192  *    request queue, and are served best by having the requests passed

    193  *    directly to them.  This can be achieved by providing a function

    194  *    to blk_queue_make_request().

    195  *

    196  * Caveat:

    197  *    The driver that does this *must* be able to deal appropriately

    198  *    with buffers in "highmemory". This can be accomplished by either calling

    199  *    __bio_kmap_atomic() to get a temporary kernel mapping, or by calling

    200  *    blk_queue_bounce() to create a buffer in normal memory.

    201  **/

    202 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)

    203 {

    204         /*

    205          * set defaults

    206          */

    207         q->nr_requests = BLKDEV_MAX_RQ;

    208         blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);

    209         blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);

    210         q->make_request_fn = mfn;

    211         q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;

    212         q->backing_dev_info.state = 0;

    213         q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;

    214         blk_queue_max_sectors(q, SAFE_MAX_SECTORS);

    215         blk_queue_hardsect_size(q, 512);

    216         blk_queue_dma_alignment(q, 511);

    217         blk_queue_congestion_threshold(q);

    218         q->nr_batching = BLK_BATCH_REQ;

    219

    220         q->unplug_thresh = 4;           /* hmm */

    221         q->unplug_delay = (3 * HZ) / 1000;      /* 3 milliseconds */

222         if (q->unplug_delay == 0)

    223                 q->unplug_delay = 1;

    224

    225         INIT_WORK(&q->unplug_work, blk_unplug_work);

    226

    227         q->unplug_timer.function = blk_unplug_timeout;

    228         q->unplug_timer.data = (unsigned long)q;

    229

    230         /*

    231          * by default assume old behaviour and bounce for any highmem page

    232          */

    233         blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);

    234 }

這里重點關注幾個”unplug”為名字的成員.尤其是INIT_WORK,它使得一旦unplug_work這項工作被執行,blk_unplug_work這個函數就會被執行.unplug_timer這么一賦值,我們就知道,一旦設了鬧鍾,一旦鬧鍾時間到了,blk_unplug_timeout這個函數就會被執行.並且因為這里設置了unplug_delay3ms,使得鬧鍾的timeout就是3ms,一旦激活鬧鍾,3ms之后blk_unplug_timeout就會被執行.這個函數來自block/ll_rw_blk.c:

   1646 static void blk_unplug_timeout(unsigned long data)

   1647 {

   1648         request_queue_t *q = (request_queue_t *)data;

   1649

   1650         blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,

   1651                                 q->rq.count[READ] + q->rq.count[WRITE]);

   1652

   1653         kblockd_schedule_work(&q->unplug_work);

   1654 }

可以看到,其實就是執行kblockd_schedule_work,換言之,真正被調用的函數就是blk_unplug_work().

   1636 static void blk_unplug_work(struct work_struct *work)

   1637 {

   1638         request_queue_t *q = container_of(work, request_queue_t, unplug_work);

   1639

   1640         blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,

   1641                                 q->rq.count[READ] + q->rq.count[WRITE]);

   1642

   1643         q->unplug_fn(q);

   1644 }

而剛才我們說了,unplug_fn被賦上了generic_unplug_device.所以真正要執行的是generic_unplug_device.而這個函數又長成什么樣呢?

   1601 /**

   1602  * generic_unplug_device - fire a request queue

   1603  * @q:    The &request_queue_t in question

   1604  *

   1605  * Description:

   1606  *   Linux uses plugging to build bigger requests queues before letting

   1607  *   the device have at them. If a queue is plugged, the I/O scheduler

   1608  *   is still adding and merging requests on the queue. Once the queue

   1609  *   gets unplugged, the request_fn defined for the queue is invoked and

   1610  *   transfers started.

   1611  **/

   1612 void generic_unplug_device(request_queue_t *q)

   1613 {

   1614         spin_lock_irq(q->queue_lock);

   1615         __generic_unplug_device(q);

   1616         spin_unlock_irq(q->queue_lock);

   1617 }

,扭扭捏捏大半天,其實就是調用__generic_unplug_device.而回過頭去看這個函數,我們知道,它也無非就是調用了兩個函數,blk_remove_plugrequest_fn.這下子我們基本上就明白了.總結一下就是:

1.        blk_plug_device()負責戒嚴.

2.        blk_remove_plug()負責解禁.

3.        但是戒嚴這東西吧,也是有時間限制的,畢竟長安街就算有重大活動也是短時間的,一年中畢竟大多數時間還是得保證道路暢通.所以在戒嚴的時候,設了一個定時器,unplug_timer, (mod_timer),一旦時間到了就自動執行blk_remove_plug去解禁.

4.        而在解禁的時候就不要忘記把這個定時器給關掉.(del_timer)

5.        解禁之后調用request_fn()開始處理隊列中的下一個請求,或者說車流開始恢復前行.

Ok, 這樣我們就算是明白這兩個戒嚴與解禁的函數了 . 最后 , 題外話 , 關於 unplug plug, 我覺得更貼切的單詞是 activate deactivate, 或者說激活與凍結 , 或者簡單的說 , 開與關 . 
 

注意!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系我们删除。



 
粤ICP备14056181号  © 2014-2021 ITdaan.com