下一個更為重要的函數是scsi_dispatch_cmd,來自drivers/scsi/scsi.c:
459 /*
460 * Function: scsi_dispatch_command
461 *
462 * Purpose: Dispatch a command to the low-level driver.
463 *
464 * Arguments: cmd - command block we are dispatching.
465 *
466 * Notes:
467 */
468 int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
469 {
470 struct Scsi_Host *host = cmd->device->host;
471 unsigned long flags = 0;
472 unsigned long timeout;
473 int rtn = 0;
474
475 /* check if the device is still usable */
476 if (unlikely(cmd->device->sdev_state == SDEV_DEL)) {
477 /* in SDEV_DEL we error all commands. DID_NO_CONNECT
478 * returns an immediate error upwards, and signals
479 * that the device is no longer present */
480 cmd->result = DID_NO_CONNECT << 16;
481 atomic_inc(&cmd->device->iorequest_cnt);
482 __scsi_done(cmd);
483 /* return 0 (because the command has been processed) */
484 goto out;
485 }
486
487 /* Check to see if the scsi lld put this device into state SDEV_BLOCK. */
488 if (unlikely(cmd->device->sdev_state == SDEV_BLOCK)) {
489 /*
490 * in SDEV_BLOCK, the command is just put back on the device
491 * queue. The suspend state has already blocked the queue so
492 * future requests should not occur until the device
493 * transitions out of the suspend state.
494 */
495 scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
496
497 SCSI_LOG_MLQUEUE(3, printk("queuecommand : device blocked /n"));
498
499 /*
500 * NOTE: rtn is still zero here because we don't need the
501 * queue to be plugged on return (it's already stopped)
502 */
503 goto out;
504 }
505
506 /*
507 * If SCSI-2 or lower, store the LUN value in cmnd.
508 */
509 if (cmd->device->scsi_level <= SCSI_2 &&
510 cmd->device->scsi_level != SCSI_UNKNOWN) {
511 cmd->cmnd[1] = (cmd->cmnd[1] & 0x1f) |
512 (cmd->device->lun << 5 & 0xe0);
513 }
514
515 /*
516 * We will wait MIN_RESET_DELAY clock ticks after the last reset so
517 * we can avoid the drive not being ready.
518 */
519 timeout = host->last_reset + MIN_RESET_DELAY;
520
521 if (host->resetting && time_before(jiffies, timeout)) {
522 int ticks_remaining = timeout - jiffies;
523 /*
524 * NOTE: This may be executed from within an interrupt
525 * handler! This is bad, but for now, it'll do. The irq
526 * level of the interrupt handler has been masked out by the
527 * platform dependent interrupt handling code already, so the
528 * sti() here will not cause another call to the SCSI host's
529 * interrupt handler (assuming there is one irq-level per
530 * host).
531 */
532 while (--ticks_remaining >= 0)
533 mdelay(1 + 999 / HZ);
534 host->resetting = 0;
535 }
536
537 /*
538 * AK: unlikely race here: for some reason the timer could
539 * expire before the serial number is set up below.
540 */
541 scsi_add_timer(cmd, cmd->timeout_per_command, scsi_times_out);
542
543 scsi_log_send(cmd);
544
545 /*
546 * We will use a queued command if possible, otherwise we will
547 * emulate the queuing and calling of completion function ourselves.
548 */
549 atomic_inc(&cmd->device->iorequest_cnt);
550
551 /*
552 * Before we queue this command, check if the command
553 * length exceeds what the host adapter can handle.
554 */
555 if (CDB_SIZE(cmd) > cmd->device->host->max_cmd_len) {
556 SCSI_LOG_MLQUEUE(3,
557 printk("queuecommand : command too long./n"));
558 cmd->result = (DID_ABORT << 16);
559
560 scsi_done(cmd);
561 goto out;
562 }
563
564 spin_lock_irqsave(host->host_lock, flags);
565 scsi_cmd_get_serial(host, cmd);
566
567 if (unlikely(host->shost_state == SHOST_DEL)) {
568 cmd->result = (DID_NO_CONNECT << 16);
569 scsi_done(cmd);
570 } else {
571 rtn = host->hostt->queuecommand(cmd, scsi_done);
572 }
573 spin_unlock_irqrestore(host->host_lock, flags);
574 if (rtn) {
575 if (scsi_delete_timer(cmd)) {
576 atomic_inc(&cmd->device->iodone_cnt);
577 scsi_queue_insert(cmd,
578 (rtn == SCSI_MLQUEUE_DEVICE_BUSY) ?
579 rtn : SCSI_MLQUEUE_HOST_BUSY);
580 }
581 SCSI_LOG_MLQUEUE(3,
582 printk("queuecommand : request rejected/n"));
583 }
584
585 out:
586 SCSI_LOG_MLQUEUE(3, printk("leaving scsi_dispatch_cmnd()/n"));
587 return rtn;
588 }
一路走來的兄弟一定會一眼就看出這里我們最期待的一行代碼就是571那個queuecommand()的調用.因為這之后我們就知道該發生什么了.比如對於U盤驅動來說,命令就從這里接過去開始執行.而對於實際的scsi控制器,其對應的驅動中的queuecommand也會被調用,剩下的事情我們就不用操心了.正常情況下queuecommand返回0.於是緊接着scsi_dispatch_cmd也返回0.這樣就算是執行了一條scsi命令了.
而scsi_request_fn()是否結束還得看while循環的條件是否滿足,而這就得看blk_queue_plugged()的臉色了.那么我們從字面上來分析,什么叫queue plugged?我那盜版金山詞霸告訴我plugged就是塞緊的意思,你說隊列塞緊的是什么意思?比如說,北四環上上下班高峰期,許許多多的車輛排成一隊又一隊,但是可能半天都前進不了,這就叫塞緊,或者說堵車,也叫塞車.為此咱們使用一個flag來標志堵車與否,來自include/linux/blkdev.h:
523 #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
改變這個這個flag的函數有兩個,一個是設置,一個是取消.
負責設置的是blk_plug_device.
1542 /*
1543 * "plug" the device if there are no outstanding requests: this will
1544 * force the transfer to start only after we have put all the requests
1545 * on the list.
1546 *
1547 * This is called with interrupts off and no requests on the queue and
1548 * with the queue lock held.
1549 */
1550 void blk_plug_device(request_queue_t *q)
1551 {
1552 WARN_ON(!irqs_disabled());
1553
1554 /*
1555 * don't plug a stopped queue, it must be paired with blk_start_queue()
1556 * which will restart the queueing
1557 */
1558 if (blk_queue_stopped(q))
1559 return;
1560
1561 if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
1562 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
1563 blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
1564 }
1565 }
負責取消的是blk_remove_plug().
1569 /*
1570 * remove the queue from the plugged list, if present. called with
1571 * queue lock held and interrupts disabled.
1572 */
1573 int blk_remove_plug(request_queue_t *q)
1574 {
1575 WARN_ON(!irqs_disabled());
1576
1577 if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
1578 return 0;
1579
1580 del_timer(&q->unplug_timer);
1581 return 1;
1582 }
而調用前者的地方不少,比如我們見到的__elv_add_request,其第四個參數int plug就可以控制是否調用blk_plug_device(),而當我們在blk_execute_rq_nowait()中調用__elv_add_request()的時候傳遞的plug就是1.
另一方面,調用blk_remove_plug的地方也有多處.其中__generic_unplug_device()就是之一.所以在咱們這個上下文里,實際上並沒有設置這個flag,因此scsi_request_fn()就會被執行.
那么編寫這兩個函數究竟是為了什么呢?這年頭,有人做賊,我可以理解是為了劫富濟貧,有人殺人,我可以理解是為了伸張正義,甚至有女人紅杏出牆,我還可以理解是為了繁榮經濟.然而,很長一段時間我都沒辦法理解有人編寫這兩個函數是為了什么?
后來我想,不妨這樣理解,假設你經常開車經過長安街,你會發現經常有戒嚴的現象發生,比如某位領導人要出行,比如某位領導人要來訪,而你可以把blk_plug_device()想象成戒嚴,把blk_remove_plug想象成開放.車流要想行進,前提條件是沒有戒嚴,換言之,沒有設卡,而QUEUE_FLAG_PLUGGED這個flag就相當於”卡”,設了它隊列就不能前進了,沒有設才有可能前進.之所以需要設卡,是因為確實有這個需求,有時候確實不想讓隊列前進.
那么這里我們還看到兩個函數被調用了,mod_timer和del_timer,這是干嘛使的?還記得kblockd么?最早咱們創建了那個工作隊列kblockd_workqueue,現在是它該出場的時間了.讓我們把鏡頭拉回到函數blk_init_queue_node().這個函數我們曾經看過,所以這里只貼出其中跟我們這里密切相關的幾行:
1922 q->request_fn = rfn;
1923 q->prep_rq_fn = NULL;
1924 q->unplug_fn = generic_unplug_device;
1925 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER);
1926 q->queue_lock = lock;
1927
1928 blk_queue_segment_boundary(q, 0xffffffff);
1929
1930 blk_queue_make_request(q, __make_request);
首先q->unplug_fn被賦上了generic_unplug_device.這一點很重要,稍后會用到.
然后來看blk_queue_make_request().這個函數當時咱們並沒有講過.來自block/ll_rw_block.c:
180 /**
181 * blk_queue_make_request - define an alternate make_request function for a device
182 * @q: the request queue for the device to be affected
183 * @mfn: the alternate make_request function
184 *
185 * Description:
186 * The normal way for &struct bios to be passed to a device
187 * driver is for them to be collected into requests on a request
188 * queue, and then to allow the device driver to select requests
189 * off that queue when it is ready. This works well for many block
190 * devices. However some block devices (typically virtual devices
191 * such as md or lvm) do not benefit from the processing on the
192 * request queue, and are served best by having the requests passed
193 * directly to them. This can be achieved by providing a function
194 * to blk_queue_make_request().
195 *
196 * Caveat:
197 * The driver that does this *must* be able to deal appropriately
198 * with buffers in "highmemory". This can be accomplished by either calling
199 * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling
200 * blk_queue_bounce() to create a buffer in normal memory.
201 **/
202 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
203 {
204 /*
205 * set defaults
206 */
207 q->nr_requests = BLKDEV_MAX_RQ;
208 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
209 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
210 q->make_request_fn = mfn;
211 q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
212 q->backing_dev_info.state = 0;
213 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
214 blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
215 blk_queue_hardsect_size(q, 512);
216 blk_queue_dma_alignment(q, 511);
217 blk_queue_congestion_threshold(q);
218 q->nr_batching = BLK_BATCH_REQ;
219
220 q->unplug_thresh = 4; /* hmm */
221 q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */
222 if (q->unplug_delay == 0)
223 q->unplug_delay = 1;
224
225 INIT_WORK(&q->unplug_work, blk_unplug_work);
226
227 q->unplug_timer.function = blk_unplug_timeout;
228 q->unplug_timer.data = (unsigned long)q;
229
230 /*
231 * by default assume old behaviour and bounce for any highmem page
232 */
233 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
234 }
這里重點關注幾個”unplug”為名字的成員.尤其是INIT_WORK,它使得一旦unplug_work這項工作被執行,blk_unplug_work這個函數就會被執行.而unplug_timer這么一賦值,我們就知道,一旦設了鬧鍾,一旦鬧鍾時間到了,blk_unplug_timeout這個函數就會被執行.並且因為這里設置了unplug_delay為3ms,使得鬧鍾的timeout就是3ms,一旦激活鬧鍾,3ms之后blk_unplug_timeout就會被執行.這個函數來自block/ll_rw_blk.c:
1646 static void blk_unplug_timeout(unsigned long data)
1647 {
1648 request_queue_t *q = (request_queue_t *)data;
1649
1650 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
1651 q->rq.count[READ] + q->rq.count[WRITE]);
1652
1653 kblockd_schedule_work(&q->unplug_work);
1654 }
可以看到,其實就是執行kblockd_schedule_work,換言之,真正被調用的函數就是blk_unplug_work().
1636 static void blk_unplug_work(struct work_struct *work)
1637 {
1638 request_queue_t *q = container_of(work, request_queue_t, unplug_work);
1639
1640 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
1641 q->rq.count[READ] + q->rq.count[WRITE]);
1642
1643 q->unplug_fn(q);
1644 }
而剛才我們說了,unplug_fn被賦上了generic_unplug_device.所以真正要執行的是generic_unplug_device.而這個函數又長成什么樣呢?
1601 /**
1602 * generic_unplug_device - fire a request queue
1603 * @q: The &request_queue_t in question
1604 *
1605 * Description:
1606 * Linux uses plugging to build bigger requests queues before letting
1607 * the device have at them. If a queue is plugged, the I/O scheduler
1608 * is still adding and merging requests on the queue. Once the queue
1609 * gets unplugged, the request_fn defined for the queue is invoked and
1610 * transfers started.
1611 **/
1612 void generic_unplug_device(request_queue_t *q)
1613 {
1614 spin_lock_irq(q->queue_lock);
1615 __generic_unplug_device(q);
1616 spin_unlock_irq(q->queue_lock);
1617 }
哦,扭扭捏捏大半天,其實就是調用__generic_unplug_device.而回過頭去看這個函數,我們知道,它也無非就是調用了兩個函數,blk_remove_plug和request_fn.這下子我們基本上就明白了.總結一下就是:
1. blk_plug_device()負責戒嚴.
2. blk_remove_plug()負責解禁.
3. 但是戒嚴這東西吧,也是有時間限制的,畢竟長安街就算有重大活動也是短時間的,一年中畢竟大多數時間還是得保證道路暢通.所以在戒嚴的時候,設了一個定時器,unplug_timer, (即mod_timer),一旦時間到了就自動執行blk_remove_plug去解禁.
4. 而在解禁的時候就不要忘記把這個定時器給關掉.(即del_timer)
5. 解禁之后調用request_fn()開始處理隊列中的下一個請求,或者說車流開始恢復前行.
本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系我们删除。