字符设备驱动程序

Linux Android

Posted by LXG on March 21, 2022

字符设备

字符设备通过字符以流的方式向用户程序传递数据,就像串行端口那样。字符设备驱动通过/dev目录下的特殊文件公开设备的属性和功能,通过这个文件可以在设备和用户应用程序之间交换数据,也可以通过它来控制实际的物理设备。

kernel/include/linux/cdev.h


#ifndef _LINUX_CDEV_H
#define _LINUX_CDEV_H

#include <linux/kobject.h>
#include <linux/kdev_t.h>
#include <linux/list.h>

struct file_operations;
struct inode;
struct module;

struct cdev {
	struct kobject kobj;
	struct module *owner;
	const struct file_operations *ops;
	struct list_head list;
	dev_t dev;                       // 描述主设备和次设备号
	unsigned int count;
};

void cdev_init(struct cdev *, const struct file_operations *);

struct cdev *cdev_alloc(void);

void cdev_put(struct cdev *p);

int cdev_add(struct cdev *, dev_t, unsigned);

void cdev_del(struct cdev *);

void cd_forget(struct inode *);

#endif

设备文件操作

每一列的第一个字符代表设备类型

  • c: 字符设备文件
  • b: 块设备文件
  • l: 符号链接
  • d: 目录
  • s: 套接字
  • p: 命名管道

# ls -al dev/                                                                                                                                                                                                    
total 8
drwxr-xr-x 18 root      root             1680 2022-03-21 15:19 .
drwxrwxrwt 18 root      root             1140 2022-03-21 15:19 ..
----------  1 root      root                0 2022-03-21 15:19 .coldboot_done
drwxr-xr-x  2 root      root              820 2022-03-21 15:19 __properties__
crw-rw-rw-  1 root      root          10,  58 2022-03-21 15:19 ashmem
crw-rw-rw-  1 root      root          10,  56 2022-03-21 15:19 binder
drwxr-xr-x  4 root      root              960 2022-03-21 15:19 block
drwxr-xr-x  3 root      root               60 2022-03-21 15:19 bus
crw-rw-r--  1 media     camera        10,  59 2022-03-21 15:19 camsys_marvin
crw-------  1 root      root         250,   0 2022-03-21 15:19 cec0
crw-------  1 root      root           5,   1 2022-03-21 15:19 console
crw-------  1 root      root          10,  52 2022-03-21 15:19 cpu_dma_latency
dr-xr-xr-x  3 system    system              0 2022-03-21 15:19 cpuctl
dr-xr-xr-x  6 system    system              0 2022-03-21 15:19 cpuset
crw-------  1 root      root          10, 236 2022-03-21 15:19 device-mapper
drwxr-xr-x  2 root      root              100 2022-03-21 15:19 dri
lrwxrwxrwx  1 root      root               13 2022-03-21 15:19 fd -> /proc/self/fd
drwxrwx---  2 root      system             40 2022-03-21 15:21 fscklogs
crw-rw-rw-  1 root      root           1,   7 2022-03-21 15:19 full
crw-------  1 root      root          10, 229 2022-03-21 15:19 fuse
drwxr-xr-x  2 root      root               60 2022-03-21 15:19 graphics
crw-------  1 root      root          10,  46 2022-03-21 15:19 hdmi_hdcp1x
crw-rw-rw-  1 media     media        244,   0 2022-03-21 15:19 hevc-service
crw-------  1 root      root         239,   0 2022-03-21 15:19 hidraw0
crw-------  1 root      root          10,  55 2022-03-21 15:19 hwbinder
crw-------  1 root      root          89,   0 2022-03-21 15:19 i2c-0
crw-rw----  1 system    system        89,   1 2022-03-21 15:19 i2c-1
crw-------  1 root      root          89,   2 2022-03-21 15:19 i2c-2
crw-------  1 root      root          89,   3 2022-03-21 15:19 i2c-3
crw-------  1 root      root          89,   4 2022-03-21 15:19 i2c-4
crw-------  1 root      root          89,   6 2022-03-21 15:19 i2c-6
crw-rw-rw-  1 media     media         10,  61 2022-03-21 15:19 iep
crw-rw----  1 system    system       247,   0 2022-03-21 15:19 iio:device0
drwxr-xr-x  2 root      root               80 2022-03-21 15:19 input
lrwxrwxrwx  1 root      root               10 2022-03-21 15:19 jlb -> /dev/ttyS3
crw-------  1 root      root          10,  60 2022-03-21 15:19 keychord
crw-------  1 root      root           1,  11 2022-03-21 15:19 kmsg
crw-------  1 root      root          10, 237 2022-03-21 15:19 loop-control
crw-rw-rw-  1 system    system        10,  47 2022-03-21 15:19 mali0
crw-------  1 media     media          1,   1 2022-03-21 15:19 mem
dr-xr-xr-x  3 root      root                0 2022-03-21 15:19 memcg
crw-------  1 root      root          10,  49 2022-03-21 15:19 memory_bandwidth
crw-rw----  1 root      mtp           10,  43 2022-03-21 15:19 mtp_usb
crw-------  1 root      root          10,  51 2022-03-21 15:19 network_latency
crw-------  1 root      root          10,  50 2022-03-21 15:19 network_throughput
crw-rw-rw-  1 root      root           1,   3 2022-03-21 15:19 null
crw-rw-rw-  1 root      root          10,  62 2022-03-21 15:19 opteearmtz00
c-w--w--w-  1 root      log          253,   0 2022-03-21 15:19 pmsg0
crw-rw----  1 radio     vpn          108,   0 2022-03-21 15:19 ppp
crw-rw-rw-  1 root      root           5,   2 2022-03-21 15:27 ptmx
drwxr-xr-x  2 root      root                0 1970-01-01 08:00 pts
crw-rw-rw-  1 root      root           1,   8 2022-03-21 15:19 random
crw-------  1 root      root          10,  63 2022-03-21 15:19 rfkill
crw-rw-rw-  1 system    system        10,  48 2022-03-21 15:19 rga
crw-r-----  1 system    system       252,   0 2022-03-21 15:19 rtc0
crw-rw----  1 bluetooth net_bt_stack 240,   0 2022-03-21 15:19 rtk_btusb
lrwxrwxrwx  1 root      root               10 2022-03-21 15:19 s3c2410_serial3 -> /dev/ttyS3
drwxr-xr-x  2 root      root              120 2022-03-21 15:19 snd
drwxr-xr-x  2 root      root              400 2022-03-21 15:20 socket
drwxr-xr-x  5 system    system            100 2022-03-21 15:19 stune
crw-rw-rw-  1 system    system        10,  57 2022-03-21 15:19 sw_sync
crw-rw-rw-  1 root      root           5,   0 2022-03-21 15:19 tty
crw-------  1 root      root         254,   0 2022-03-21 15:19 ttyFIQ0
crw-rw----  1 bluetooth net_bt_stack   4,  64 2022-03-21 15:19 ttyS0
crwxrwxrwx  1 root      root           4,  65 2022-03-21 15:19 ttyS1
crwxrwxrwx  1 root      root           4,  66 2022-03-21 15:19 ttyS2
crwxrwxrwx  1 root      root           4,  67 2022-03-21 15:19 ttyS3
crwxrwxrwx  1 root      root           4,  68 2022-03-21 15:19 ttyS4
crw-rw----  1 radio     radio        188,   0 2022-03-21 15:19 ttyUSB0
crw-rw----  1 radio     radio        188,   1 2022-03-21 15:27 ttyUSB1
crw-rw----  1 radio     radio        188,   2 2022-03-21 15:19 ttyUSB2
crw-rw----  1 system    vpn           10, 200 2022-03-21 15:19 tun
crw-rw----  1 system    net_bt_stack  10, 239 2022-03-21 15:19 uhid
crw-rw----  1 system    net_bt_stack  10, 223 2022-03-21 15:19 uinput
crw-rw-rw-  1 root      root           1,   9 2022-03-21 15:19 urandom
drwxr-xr-x  2 root      root              140 2022-03-21 15:19 usb
drwxrwx---  3 shell     shell              60 2022-03-21 15:19 usb-ffs
crw-rw----  1 root      usb           10,  44 2022-03-21 15:19 usb_accessory
crw-rw-r--  1 bluetooth net_bt_stack  10,  45 2022-03-21 15:19 vendor_storage
-rw-------  1 root      root                1 2022-03-21 15:20 video_state
crw-------  1 root      root          10,  54 2022-03-21 15:19 vndbinder
crw-rw-rw-  1 media     media        245,   0 2022-03-21 15:19 vpu-service
crw-r--r--  1 root      root          10,  53 2022-03-21 15:19 xt_qtaguid
crw-rw-rw-  1 root      root           1,   5 2022-03-21 15:19 zero

第五六列代表主设备号和从设备号,设备注册时,必须使用主设备号和次设备号,前者标识这个设备,后者用作本地设备列表中的数组索引,因为同一个驱动程序的一个实例可以处理多个设备,而不同的驱动程序可以处理相同类型的不同设备

设备号的分配和释放

设备号在系统范围内标识设备文件,设备号的分配有两种方法,静态方法(register_chrdev_region)和动态方法(allo_chrdev_region)

设备文件操作

kernel/drivers/tty/tty_io.c


static const struct file_operations tty_fops = {
        .llseek         = no_llseek,
        .read           = tty_read,
        .write          = tty_write,
        .poll           = tty_poll,
        .unlocked_ioctl = tty_ioctl,
        .compat_ioctl   = tty_compat_ioctl,
        .open           = tty_open,
        .release        = tty_release,
        .fasync         = tty_fasync,
};

kernel/drivers/include/linux/fs.h


struct file_operations {
	struct module *owner;
	loff_t (*llseek) (struct file *, loff_t, int);
	ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
	int (*iterate) (struct file *, struct dir_context *);
	unsigned int (*poll) (struct file *, struct poll_table_struct *);
	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
	int (*mmap) (struct file *, struct vm_area_struct *);
	int (*open) (struct inode *, struct file *);
	int (*flush) (struct file *, fl_owner_t id);
	int (*release) (struct inode *, struct file *);
	int (*fsync) (struct file *, loff_t, loff_t, int datasync);
	int (*aio_fsync) (struct kiocb *, int datasync);
	int (*fasync) (int, struct file *, int);
	int (*lock) (struct file *, int, struct file_lock *);
	ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
	int (*check_flags)(int);
	int (*flock) (struct file *, int, struct file_lock *);
	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
	int (*setlease)(struct file *, long, struct file_lock **, void **);
	long (*fallocate)(struct file *file, int mode, loff_t offset,
			  loff_t len);
	void (*show_fdinfo)(struct seq_file *m, struct file *f);
};

每一个回调函数都和系统调用链接到一起,当用户代码在指定文件上调用与文件相关的系统调用时,内核会查找负责这个文件的驱动程序,定位它的struct file_operations结构,并检查和该系统调用匹配的方法是否已经定义, 如果定义了就运行它; 如果未定义则返回相应的错误码。

内核中的文件表示

内核把文件结构描述为inode结构的实例,定义在kernel/drivers/include/linux/fs.h


struct inode {

	union {
		struct pipe_inode_info	*i_pipe; // linux 内核管道
		struct block_device	*i_bdev; // 块设比
		struct cdev		*i_cdev; // 字符设备
		char			*i_link;
	};

}

struct file {
	union {
		struct llist_node	fu_llist;
		struct rcu_head 	fu_rcuhead;
	} f_u;
	struct path		f_path;                             // 文件路径
	struct inode		*f_inode;	/* cached value */  // 此文件相关的inode
	const struct file_operations	*f_op;                      // 可以在此文件上执行的操作

	/*
	 * Protects f_ep_links, f_flags.
	 * Must not be taken from IRQ context.
	 */
	spinlock_t		f_lock;
	atomic_long_t		f_count;
	unsigned int 		f_flags;
	fmode_t			f_mode;
	struct mutex		f_pos_lock;
	loff_t			f_pos;                             // 此文件中光标的位置
	struct fown_struct	f_owner;
	const struct cred	*f_cred;
	struct file_ra_state	f_ra;

	u64			f_version;
	/* needed for tty driver, and maybe others */
	void			*private_data;                     // 驱动程序可以设置的私有数据,以便在文件操作之间共享


struct inode 是文件系统的数据结构,它只与操作系统有关,用于保存文件和目录,不跟踪文件的当前位置当前模式,它只是帮助操作系统找到底层文件结构的内容。

struct file 是更高级的文件描述,它代表内核中打开的文件,依赖于低层的stuct inode结构

struct inode 代表内核中的文件,struct file描述实际打开的文件。同一个文件打开多次时,可能会有不同的文件描述符,但它们都指向同一个inode

分配和注册字符设备

char_device_driver

字符设备在内核中表示为struct cdev的实例。在编写字符设备驱动程序时,目标是最终创建并注册与struct file_operation关联的结构实例,为用户空间提供一组可以在该设备上执行的操作,为了这个目标需要几个步骤:

  1. 使用alloc_chrdev_region()保留一个主设备号和一定范围的次设备号
  2. 使用class_create()创建自己的设备类
  3. 创建一个struct file_operation(传递给cdev_init), 每一个设备都需要创建,并调用call_init()和cdev_add() 注册这个设备
  4. 调用device_create()创建每个设备,并给它们一个合适的名字,这样就可以在/dev目录下创建设备

串口设备tty

kernel/drivers/tty/tty_io.c


static const struct file_operations tty_fops = {
	.llseek		= no_llseek,
	.read		= tty_read,
	.write		= tty_write,
	.poll		= tty_poll,
	.unlocked_ioctl	= tty_ioctl,
	.compat_ioctl	= tty_compat_ioctl,
	.open		= tty_open,
	.release	= tty_release,
	.fasync		= tty_fasync,
};

/*
 * Called by a tty driver to register itself.
 */
int tty_register_driver(struct tty_driver *driver)
{
	int error;
	int i;
	dev_t dev;
	struct device *d;

	if (!driver->major) {
		error = alloc_chrdev_region(&dev, driver->minor_start,
						driver->num, driver->name);
		if (!error) {
			driver->major = MAJOR(dev);
			driver->minor_start = MINOR(dev);
		}
	} else {
		dev = MKDEV(driver->major, driver->minor_start);
		error = register_chrdev_region(dev, driver->num, driver->name);
	}
	if (error < 0)
		goto err;

	if (driver->flags & TTY_DRIVER_DYNAMIC_ALLOC) {
		error = tty_cdev_add(driver, dev, 0, driver->num);
		if (error)
			goto err_unreg_char;
	}

	mutex_lock(&tty_mutex);
	list_add(&driver->tty_drivers, &tty_drivers);
	mutex_unlock(&tty_mutex);

	if (!(driver->flags & TTY_DRIVER_DYNAMIC_DEV)) {
		for (i = 0; i < driver->num; i++) {
			d = tty_register_device(driver, i, NULL);
			if (IS_ERR(d)) {
				error = PTR_ERR(d);
				goto err_unreg_devs;
			}
		}
	}
	proc_tty_register_driver(driver);
	driver->flags |= TTY_DRIVER_INSTALLED;
	return 0;

err_unreg_devs:
	for (i--; i >= 0; i--)
		tty_unregister_device(driver, i);

	mutex_lock(&tty_mutex);
	list_del(&driver->tty_drivers);
	mutex_unlock(&tty_mutex);

err_unreg_char:
	unregister_chrdev_region(dev, driver->num);
err:
	return error;
}
EXPORT_SYMBOL(tty_register_driver);

// 创建设备类,在sys/class中可见
static int __init tty_class_init(void)
{
	tty_class = class_create(THIS_MODULE, "tty");
	if (IS_ERR(tty_class))
		return PTR_ERR(tty_class);
	tty_class->devnode = tty_devnode;
	return 0;
}

postcore_initcall(tty_class_init);

/*
 * Ok, now we can initialize the rest of the tty devices and can count
 * on memory allocations, interrupts etc..
 */
int __init tty_init(void)
{
        //将file_operation绑定到cdev
	cdev_init(&tty_cdev, &tty_fops);

        //让用户访问设备
	if (cdev_add(&tty_cdev, MKDEV(TTYAUX_MAJOR, 0), 1) ||
	    register_chrdev_region(MKDEV(TTYAUX_MAJOR, 0), 1, "/dev/tty") < 0)
		panic("Couldn't register /dev/tty driver\n");

	device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 0), NULL, "tty");

	cdev_init(&console_cdev, &console_fops);
	if (cdev_add(&console_cdev, MKDEV(TTYAUX_MAJOR, 1), 1) ||
	    register_chrdev_region(MKDEV(TTYAUX_MAJOR, 1), 1, "/dev/console") < 0)
		panic("Couldn't register /dev/console driver\n");
	consdev = device_create_with_groups(tty_class, NULL,
					    MKDEV(TTYAUX_MAJOR, 1), NULL,
					    cons_dev_groups, "console");
	if (IS_ERR(consdev))
		consdev = NULL;

#ifdef CONFIG_VT
	vty_init(&console_fops);
#endif
	return 0;
}

设备中的class节点


rk3288:/ $ ls -al sys/class/tty/
total 0
drwxr-xr-x  2 root root 0 2021-01-01 20:00 .
drwxr-xr-x 74 root root 0 2021-01-01 20:00 ..
lrwxrwxrwx  1 root root 0 2022-03-22 14:51 console -> ../../devices/virtual/tty/console
lrwxrwxrwx  1 root root 0 2022-03-22 14:51 ptmx -> ../../devices/virtual/tty/ptmx
lrwxrwxrwx  1 root root 0 2022-03-22 14:51 tty -> ../../devices/virtual/tty/tty
lrwxrwxrwx  1 root root 0 2022-03-22 14:51 ttyFIQ0 -> ../../devices/platform/fiq_debugger.0/tty/ttyFIQ0
lrwxrwxrwx  1 root root 0 2022-03-22 14:51 ttyS0 -> ../../devices/platform/ff180000.serial/tty/ttyS0
lrwxrwxrwx  1 root root 0 2022-03-22 14:51 ttyS1 -> ../../devices/platform/ff190000.serial/tty/ttyS1
lrwxrwxrwx  1 root root 0 2022-03-22 14:51 ttyS2 -> ../../devices/platform/ff690000.serial/tty/ttyS2
lrwxrwxrwx  1 root root 0 2022-03-22 14:51 ttyS3 -> ../../devices/platform/ff1b0000.serial/tty/ttyS3
lrwxrwxrwx  1 root root 0 2022-03-22 14:51 ttyS4 -> ../../devices/platform/ff1c0000.serial/tty/ttyS4
lrwxrwxrwx  1 root root 0 2022-03-22 14:51 ttyUSB0 -> ../../devices/platform/ff500000.usb/usb2/2-1/2-1:1.2/ttyUSB0/tty/ttyUSB0
lrwxrwxrwx  1 root root 0 2022-03-22 14:51 ttyUSB1 -> ../../devices/platform/ff500000.usb/usb2/2-1/2-1:1.3/ttyUSB1/tty/ttyUSB1

写文件操作

../include/asm-generic/uaccess.h


//*to是内核空间的指针,*from是用户空间指针,n表示从用户空间想内核空间拷贝数据的字节数
static inline long copy_from_user(void *to,
                const void __user * from, unsigned long n)
{
        unsigned long res = n;
        might_fault();
        if (likely(access_ok(VERIFY_READ, from, n)))
                res = __copy_from_user(to, from, n);
        if (unlikely(res))
                memset(to + (n - res), 0, res);
        return res;
}

static inline long copy_to_user(void __user *to,
                const void *from, unsigned long n)
{
        might_fault();
        if (access_ok(VERIFY_WRITE, to, n))
                return __copy_to_user(to, from, n);
        else
                return n;
}

Linux 串口驱动

linux驱动之串口驱动框架

tty_driver

文件操作

open

open方法在每次打开设备文件时被调用,执行open时,将把struct inode作为参数传递给函数,inode是文件在内核的底层表示。struct inode结构内的i_cdev字段指向在init函数中分配出来的cdev.


static int tty_open(struct inode *inode, struct file *filp) {

	dev_t device = inode->i_rdev;

}

release

与open方法相反,release方法在设备关闭时调用,之后撤销在open任务中已经执行的所有操作。

  1. 释放在open()阶段分配的所有私有内存
  2. 关闭设备(如果支持关闭), 并且丢弃上次关闭时的所有缓冲区

int tty_release(struct inode *inode, struct file *filp) {

}

write

write()方法用于向设备发送数据,每当用户调用设备文件上的write函数时,就会调用其内核实现


static ssize_t tty_write(struct file *file, const char __user *buf,
						size_t count, loff_t *ppos)
{
        //------------------------------------------------------
	/* Do the write .. */
	for (;;) {
		size_t size = count;
		if (size > chunk)
			size = chunk;
		ret = -EFAULT;
		if (copy_from_user(tty->write_buf, buf, size))
			break;
		ret = write(tty, file, tty->write_buf, size);
		if (ret <= 0)
			break;
		written += ret;
		buf += ret;
		count -= ret;
		if (!count)
			break;
		ret = -ERESTARTSYS;
		if (signal_pending(current))
			break;
		cond_resched();
	}
        //-------------------------------------------------------
}

  • 返回值是写入的字节数
  • buf表示来自用户空间的数据缓冲区
  • count是请求传输的数据的长度
  • ppos表示数据在文件中应写入的起始位置

read


static ssize_t tty_read(struct file *file, char __user *buf, size_t count,
			loff_t *ppos)
{

    struct inode *inode = file_inode(file);

}

  • buf: 用户空间接收的缓冲区
  • count: 请求传输的数据的大小(用户缓冲区的大小)
  • ppos: 文件中读取数据的起始位置

llseek

在文件中移动光标位置时会使用此方法


loff_t default_llseek(struct file *file, loff_t offset, int whence);

  • offset: 相对于文件当前位置的偏移量
  • whence: 定义从哪里开始查找 SEEK_SET, SEEK_CUR, SEEK_END

poll

如果需要实现被动等待 (在感知字符设备时不浪费CPU周期),则必须实现poll函数,每当用户空间程序与设备关联的文件上执行系统调用select()或者poll()时,都会调用poll函数


static unsigned int tty_poll(struct file *filp, poll_table *wait)

Linux select/poll机制原理分析

I/O多路复用(multiplexing)的本质是通过一种机制(系统内核缓冲I/O数据),让单个进程可以监视多个文件描述符,一旦某个描述符就绪(一般是读就绪或写就绪),能够通知程序进行相应的读写操作

Unix五种IO模型

  1. blocking IO - 阻塞IO
  2. nonblocking IO - 非阻塞IO
  3. IO multiplexing - IO多路复用
  4. signal driven IO - 信号驱动IO
  5. asynchronous IO - 异步IO

其中前面4种IO都可以归类为synchronous IO - 同步IO,而select、poll、epoll本质上也都是同步I/O,因为他们都需要在读写事件就绪后自己负责进行读写,也就是说这个读写过程是阻塞的。

与多进程和多线程技术相比,I/O多路复用技术的最大优势是系统开销小,系统不必创建进程/线程,也不必维护这些进程/线程,从而大大减小了系统的开销。

io_kernel

简单来说,select/poll能监听多个设备的文件描述符,只要有任何一个设备满足条件,select/poll就会返回,否则将进行睡眠等待。看起来,select/poll像是一个管家了,统一负责来监听处理了。

io_kernel_2

select系统调用

syscall_select

kernel/fs/select.c

select系统调用,最终的核心逻辑是在do_select函数中处理的,参考fs/select.c文件;

int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
{
	ktime_t expire, *to = NULL;
	struct poll_wqueues table;
	poll_table *wait;
	int retval, i, timed_out = 0;
	u64 slack = 0;
	unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
	unsigned long busy_end = 0;

	rcu_read_lock();
	retval = max_select_fd(n, fds);
	rcu_read_unlock();

	if (retval < 0)
		return retval;
	n = retval;

	poll_initwait(&table);
	wait = &table.pt;
	if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
		wait->_qproc = NULL;
		timed_out = 1;
	}

	if (end_time && !timed_out)
		slack = select_estimate_accuracy(end_time);

	retval = 0;
	for (;;) {
		unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
		bool can_busy_loop = false;

		inp = fds->in; outp = fds->out; exp = fds->ex;
		rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;

		for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
			unsigned long in, out, ex, all_bits, bit = 1, mask, j;
			unsigned long res_in = 0, res_out = 0, res_ex = 0;

			in = *inp++; out = *outp++; ex = *exp++;
			all_bits = in | out | ex;
			if (all_bits == 0) {
				i += BITS_PER_LONG;
				continue;
			}

			for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) {
				struct fd f;
				if (i >= n)
					break;
				if (!(bit & all_bits))
					continue;
				f = fdget(i);
				if (f.file) {
					const struct file_operations *f_op;
					f_op = f.file->f_op;
					mask = DEFAULT_POLLMASK;
					if (f_op->poll) {
						wait_key_set(wait, in, out,
							     bit, busy_flag);
						mask = (*f_op->poll)(f.file, wait);
					}
					fdput(f);
					if ((mask & POLLIN_SET) && (in & bit)) {
						res_in |= bit;
						retval++;
						wait->_qproc = NULL;
					}
					if ((mask & POLLOUT_SET) && (out & bit)) {
						res_out |= bit;
						retval++;
						wait->_qproc = NULL;
					}
					if ((mask & POLLEX_SET) && (ex & bit)) {
						res_ex |= bit;
						retval++;
						wait->_qproc = NULL;
					}
					/* got something, stop busy polling */
					if (retval) {
						can_busy_loop = false;
						busy_flag = 0;

					/*
					 * only remember a returned
					 * POLL_BUSY_LOOP if we asked for it
					 */
					} else if (busy_flag & mask)
						can_busy_loop = true;

				}
			}
			if (res_in)
				*rinp = res_in;
			if (res_out)
				*routp = res_out;
			if (res_ex)
				*rexp = res_ex;
			cond_resched();
		}
		wait->_qproc = NULL;

		if (retval || timed_out || signal_pending(current))
			break;
		if (table.error) {
			retval = table.error;
			break;
		}

		/* only if found POLL_BUSY_LOOP sockets && not out of time */
		if (can_busy_loop && !need_resched()) {
			if (!busy_end) {
				busy_end = busy_loop_end_time();
				continue;
			}
			if (!busy_loop_timeout(busy_end))
				continue;
		}
		busy_flag = 0;

		/*
		 * If this is the first loop and we have a timeout
		 * given, then we convert to ktime_t and set the to
		 * pointer to the expiry value.
		 */
		if (end_time && !to) {
			expire = timespec_to_ktime(*end_time);
			to = &expire;
		}

		if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE,
					   to, slack))
			timed_out = 1;
	}

	poll_freewait(&table);

	return retval;
}

do_select函数中,有几个关键的操作:

  1. 初始化poll_wqueues结构,包括几个关键函数指针的初始化,用于驱动中进行回调处理;
  2. 循环遍历监测的文件描述符,并且调用f_op->poll()函数,如果有监测条件满足,则会跳出循环;
  3. 在监测的文件描述符都不满足条件时,poll_schedule_timeout让当前进程进行睡眠,超时唤醒,或者被所属的等待队列唤醒;

do_select函数的循环退出条件有三个:

  1. 检测的文件描述符满足条件;
  2. 超时;
  3. 有信号要处理;

在设备驱动程序中实现的poll()函数,会在do_select()中被调用,而驱动中的poll()函数,需要调用poll_wait()函数,poll_wait函数本身很简单,就是去回调函数p->_qproc(),这个回调函数正是poll_initwait()函数中初始化的__pollwait();

select_pollwait

其实就是驱动向select维护的struct poll_wqueue中注册,并将调用select的任务添加到驱动的等待队列中,以便在合适的时机进行唤醒。所以,本质上来说,这是基于等待队列的机制来实现的。

ioctl

ioctl 是设备驱动程序中设备控制接口函数,一个字符设备驱动通常会实现设备打开、关闭、读、写等功能,在一些需要细分的情境下,如果需要扩展新的功能,通常以增设 ioctl() 命令的方式实现。

kernel/drivers/tty/tty_io.c


/*
 * Split this up, as gcc can choke on it otherwise..
 */
long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
	struct tty_struct *tty = file_tty(file);
	struct tty_struct *real_tty;
	void __user *p = (void __user *)arg;
	int retval;
	struct tty_ldisc *ld;

	if (tty_paranoia_check(tty, file_inode(file), "tty_ioctl"))
		return -EINVAL;

	real_tty = tty_pair_get_tty(tty);

	/*
	 * Factor out some common prep work
	 */
	switch (cmd) {
	case TIOCSETD:
	case TIOCSBRK:
	case TIOCCBRK:
	case TCSBRK:
	case TCSBRKP:
		retval = tty_check_change(tty);
		if (retval)
			return retval;
		if (cmd != TIOCCBRK) {
			tty_wait_until_sent(tty, 0);
			if (signal_pending(current))
				return -EINTR;
		}
		break;
	}

	/*
	 *	Now do the stuff.
	 */
	switch (cmd) {
	case TIOCSTI:
		return tiocsti(tty, p);
	case TIOCGWINSZ:
		return tiocgwinsz(real_tty, p);
	case TIOCSWINSZ:
		return tiocswinsz(real_tty, p);
	case TIOCCONS:
		return real_tty != tty ? -EINVAL : tioccons(file);
	case FIONBIO:
		return fionbio(file, p);
	case TIOCEXCL:
		set_bit(TTY_EXCLUSIVE, &tty->flags);
		return 0;
	case TIOCNXCL:
		clear_bit(TTY_EXCLUSIVE, &tty->flags);
		return 0;
	case TIOCGEXCL:
	{
		int excl = test_bit(TTY_EXCLUSIVE, &tty->flags);
		return put_user(excl, (int __user *)p);
	}
	case TIOCNOTTY:
		if (current->signal->tty != tty)
			return -ENOTTY;
		no_tty();
		return 0;
	case TIOCSCTTY:
		return tiocsctty(tty, file, arg);
	case TIOCGPGRP:
		return tiocgpgrp(tty, real_tty, p);
	case TIOCSPGRP:
		return tiocspgrp(tty, real_tty, p);
	case TIOCGSID:
		return tiocgsid(tty, real_tty, p);
	case TIOCGETD:
		return tiocgetd(tty, p);
	case TIOCSETD:
		return tiocsetd(tty, p);
	case TIOCVHANGUP:
		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;
		tty_vhangup(tty);
		return 0;
	case TIOCGDEV:
	{
		unsigned int ret = new_encode_dev(tty_devnum(real_tty));
		return put_user(ret, (unsigned int __user *)p);
	}
	/*
	 * Break handling
	 */
	case TIOCSBRK:	/* Turn break on, unconditionally */
		if (tty->ops->break_ctl)
			return tty->ops->break_ctl(tty, -1);
		return 0;
	case TIOCCBRK:	/* Turn break off, unconditionally */
		if (tty->ops->break_ctl)
			return tty->ops->break_ctl(tty, 0);
		return 0;
	case TCSBRK:   /* SVID version: non-zero arg --> no break */
		/* non-zero arg means wait for all output data
		 * to be sent (performed above) but don't send break.
		 * This is used by the tcdrain() termios function.
		 */
		if (!arg)
			return send_break(tty, 250);
		return 0;
	case TCSBRKP:	/* support for POSIX tcsendbreak() */
		return send_break(tty, arg ? arg*100 : 250);

	case TIOCMGET:
		return tty_tiocmget(tty, p);
	case TIOCMSET:
	case TIOCMBIC:
	case TIOCMBIS:
		return tty_tiocmset(tty, cmd, p);
	case TIOCGICOUNT:
		retval = tty_tiocgicount(tty, p);
		/* For the moment allow fall through to the old method */
        	if (retval != -EINVAL)
			return retval;
		break;
	case TCFLSH:
		switch (arg) {
		case TCIFLUSH:
		case TCIOFLUSH:
		/* flush tty buffer and allow ldisc to process ioctl */
			tty_buffer_flush(tty, NULL);
			break;
		}
		break;
	case TIOCSSERIAL:
		tty_warn_deprecated_flags(p);
		break;
	}
	if (tty->ops->ioctl) {
		retval = tty->ops->ioctl(tty, cmd, arg);
		if (retval != -ENOIOCTLCMD)
			return retval;
	}
	ld = tty_ldisc_ref_wait(tty);
	retval = -EINVAL;
	if (ld->ops->ioctl) {
		retval = ld->ops->ioctl(tty, file, cmd, arg);
		if (retval == -ENOIOCTLCMD)
			retval = -ENOTTY;
	}
	tty_ldisc_deref(ld);
	return retval;
}