x86 pci bus 初始化流程

    技术2023-11-12  108

    X86上pcie的初始化枚举流程,基于kernel 5.2.9分析

    `只是一个看代码笔记,仅供参考`

    pcie的代码在kernel里面大致分两部分初始化的: 一部分和cpu的框架密切相关,使用arch_initcall初始化,x86的初始化代码位于init.c arch\x86\pci,arch_initcall(pci_arch_init); 主要是检测pci type, 设置全局的config 空间read/write函数。 另一部分是pcie subsystem的初始化,使用subsys_initcall 初始化,x86的代码位于legacy.c arch\x86\pci里面,前面适合x86相关的,最终会调用pci_scan_root_bus来扫描初始化总线下所有的bridge和ep设备。

    A. arch_initcall(pci_arch_init)的初始化:

    探测bus总线,设置read,write函数

    函数pci_arch_init:
    /* arch_initcall has too random ordering, so call the initializers in the right sequence from here. */ static __init int pci_arch_init(void) { #ifdef CONFIG_PCI_DIRECT int type = 0; type = pci_direct_probe(); #endif if (!(pci_probe & PCI_PROBE_NOEARLY)) pci_mmcfg_early_init(); if (x86_init.pci.arch_init && !x86_init.pci.arch_init()) return 0; #ifdef CONFIG_PCI_BIOS pci_pcbios_init(); #endif /* * don't check for raw_pci_ops here because we want pcbios as last * fallback, yet it's needed to run first to set pcibios_last_bus * in case legacy PCI probing is used. otherwise detecting peer busses * fails. */ #ifdef CONFIG_PCI_DIRECT pci_direct_init(type); / 下面有具体的函数 / #endif if (!raw_pci_ops && !raw_pci_ext_ops) printk(KERN_ERR "PCI: Fatal: No config space access function found\n"); dmi_check_pciprobe(); dmi_check_skip_isa_align(); return 0; } arch_initcall(pci_arch_init);
    函数pci_direct_init:
    void __init pci_direct_init(int type) { if (type == 0) return; printk(KERN_INFO "PCI: Using configuration type %d for base access\n", type); if (type == 1) { raw_pci_ops = &pci_direct_conf1; if (raw_pci_ext_ops) return; if (!(pci_probe & PCI_HAS_IO_ECS)) return; printk(KERN_INFO "PCI: Using configuration type 1 " "for extended access\n"); raw_pci_ext_ops = &pci_direct_conf1;/ 结构体的函数实现在下面 / return; } raw_pci_ops = &pci_direct_conf2; }
    pci_direct_conf1 结构体定义及回调函数
    const struct pci_raw_ops pci_direct_conf1 = { .read = pci_conf1_read, .write = pci_conf1_write, }; static int pci_conf1_read(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 *value) { unsigned long flags; if (seg || (bus > 255) || (devfn > 255) || (reg > 4095)) { *value = -1; return -EINVAL; } raw_spin_lock_irqsave(&pci_config_lock, flags); /* * #define PCI_CONF1_ADDRESS(bus, devfn, reg) \ * (0x80000000 | ((reg & 0xF00) << 16) | (bus << 16) \ * | (devfn << 8) | (reg & 0xFC)) */ outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); switch (len) { case 1: *value = inb(0xCFC + (reg & 3)); break; case 2: *value = inw(0xCFC + (reg & 2)); break; case 4: *value = inl(0xCFC); break; } raw_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } static int pci_conf1_write(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 value) { unsigned long flags; if (seg || (bus > 255) || (devfn > 255) || (reg > 4095)) return -EINVAL; raw_spin_lock_irqsave(&pci_config_lock, flags); outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); switch (len) { case 1: outb((u8)value, 0xCFC + (reg & 3)); break; case 2: outw((u16)value, 0xCFC + (reg & 2)); break; case 4: outl((u32)value, 0xCFC); break; } raw_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; }

    B. pcie read/write config 函数的注册流程:

    pci_bus_read_config_xx and pci_bus_write_config_xx:
    #define PCI_OP_READ(size, type, len) \ int noinline pci_bus_read_config_##size \ (struct pci_bus *bus, unsigned int devfn, int pos, type *value) \ { \ int res; \ unsigned long flags; \ u32 data = 0; \ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ pci_lock_config(flags); \ res = bus->ops->read(bus, devfn, pos, len, &data); \ *value = (type)data; \ pci_unlock_config(flags); \ return res; \ } #define PCI_OP_WRITE(size, type, len) \ int noinline pci_bus_write_config_##size \ (struct pci_bus *bus, unsigned int devfn, int pos, type value) \ { \ int res; \ unsigned long flags; \ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ pci_lock_config(flags); \ res = bus->ops->write(bus, devfn, pos, len, value); \ pci_unlock_config(flags); \ return res; \ } PCI_OP_READ(byte, u8, 1) PCI_OP_READ(word, u16, 2) PCI_OP_READ(dword, u32, 4) PCI_OP_WRITE(byte, u8, 1) PCI_OP_WRITE(word, u16, 2) PCI_OP_WRITE(dword, u32, 4) EXPORT_SYMBOL(pci_bus_read_config_byte); EXPORT_SYMBOL(pci_bus_read_config_word); EXPORT_SYMBOL(pci_bus_read_config_dword); EXPORT_SYMBOL(pci_bus_write_config_byte); EXPORT_SYMBOL(pci_bus_write_config_word); EXPORT_SYMBOL(pci_bus_write_config_dword);

    C. pcie subsystem的初始化流程

    1.下面是pcie scan and add device的大概函数调用流程

    subsys_initcall(pci_subsys_init); pci_subsys_init() pci_legacy_init() pcibios_scan_root(0); pci_scan_root_bus(); pci_create_root_bus(parent, bus, ops, sysdata, resources); pci_alloc_host_bridge(0);//分配buffer pci_register_host_bridge(bridge);//注册bridge的设备 pci_scan_child_bus(b); pci_scan_child_bus_extend(bus, 0); / Scan devices below a bus,子函数太多,另起一行 / pci_scan_child_bus_extend(bus, 0);//Scan devices below a bus for (devfn = 0; devfn < 256; devfn += 8) { pci_scan_slot(bus, devfn); pci_scan_single_device(bus, devfn); pci_get_slot(bus, devfn); pci_scan_device(bus, devfn); /---- 很重要,下面有详细讲解 ---/ pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000) pci_bus_generic_read_dev_vendor_id(bus, devfn, l, timeout); pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l) pci_alloc_dev(bus); pci_setup_device(dev); set_pcie_port_type(dev);//读配置空间读capability获取信息 pci_dev_assign_slot(dev); dev_set_name(); /接下来的代码是根据header type来设置或者pci 信息 / pci_device_add(dev, bus); /--- 很重要,下面有详细讲解,根据pci的信息,配置pci设备,最后device_add(&dev->dev); ---/ pci_iov_bus_range(bus);/* find bus range used by Virtual Function,Reserve buses for SR-IOV capability */ for_each_pci_bridge(dev, bus) { //Scan bridges that are already configured. We don't touch them unless they are misconfigured pci_scan_bridge_extend(bus, dev, max, 0, 0); pci_find_bus(pci_domain_nr(bus), secondary); pci_scan_child_bus(child); /重新执行一遍上面的初始化扫码过程,一级一级递归扫描设备直到最后/ for_each_pci_bridge(dev, bus) /* Scan bridges that need to be reconfigured */ pci_scan_bridge_extend(bus, dev, cmax, buses, 1);

    2.下面是一些重要函数的详细代码

    2.1 pci_subsys_init:

    static int __init pci_subsys_init(void) { /* * The init function returns an non zero value when * pci_legacy_init should be invoked. */ if (x86_init.pci.init()) { if (pci_legacy_init()) { pr_info("PCI: System does not support PCI\n"); return -ENODEV; } } pcibios_fixup_peer_bridges(); x86_init.pci.init_irq(); pcibios_init(); return 0; } subsys_initcall(pci_subsys_init);

    2.2 pci_legacy_init:

    int __init pci_legacy_init(void) { if (!raw_pci_ops) return 1; pr_info("PCI: Probing PCI hardware\n"); pcibios_scan_root(0); return 0; }

    2.3 pcibios_scan_root:

    void pcibios_scan_root(int busnum) { struct pci_bus *bus; struct pci_sysdata *sd; LIST_HEAD(resources); sd = kzalloc(sizeof(*sd), GFP_KERNEL); if (!sd) { printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busnum); return; } sd->node = x86_pci_root_bus_node(busnum);//获取当前bus所在的NUMA node 号 x86_pci_root_bus_resources(busnum, &resources); printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, sd, &resources);// 探测当前总线设备以及子总线、子总线设备 if (!bus) { pci_free_resource_list(&resources); kfree(sd); return; } pci_bus_add_devices(bus);// 全部设备探测完毕,注册设备。 }

    3. 关键函数的讲解:

    3.00 pci_scan_device流程:

    Read the config data for a PCI device, sanity-check it, and fill in the dev structure.

    pci_scan_device(); pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000); pci_bus_generic_read_dev_vendor_id(bus, devfn, l, timeout); pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l) pci_alloc_dev(bus); dev = kzalloc(sizeof(struct pci_dev), GFP_KERNEL); INIT_LIST_HEAD(&dev->bus_list); dev->dev.type = &pci_dev_type; dev->bus = pci_bus_get(bus); pci_setup_device(dev); pci_hdr_type(dev); set_pcie_port_type(dev); pci_dev_assign_slot(dev);//通过devfn来计算出阿里一个slot号,保存到dev中 list_for_each_entry(slot, &dev->bus->slots, list) if (PCI_SLOT(dev->devfn) == slot->number) dev->slot = slot; dev_set_name(); set_pcie_thunderbolt(dev); set_pcie_untrusted(dev); pci_fixup_device(pci_fixup_early, dev);/ Early fixups, before probing the BARs / pci_intx_mask_broken(dev);/test whether PCI_COMMAND_INTX_DISABLE is writable / case PCI_HEADER_TYPE_NORMAL: /* standard header */ pci_read_irq(dev);/Read interrupt line and base address registers./ pci_read_bases(dev, 6, PCI_ROM_ADDRESS);/get BARs info / pci_subsystem_ids(dev, &dev->subsystem_vendor, &dev->subsystem_device); case PCI_HEADER_TYPE_BRIDGE: pci_read_irq(dev); pci_read_bases(dev, 2, PCI_ROM_ADDRESS1); pci_read_bridge_windows(dev);/get IO and MEMORY windows size / set_pcie_hotplug_bridge(dev); /check whether support hotplug / pci_read_config_word(dev, pos + PCI_SSVID_VENDOR_ID, &dev->subsystem_vendor); pci_read_config_word(dev, pos + PCI_SSVID_DEVICE_ID, &dev->subsystem_device); case PCI_HEADER_TYPE_CARDBUS: /* CardBus bridge header */ pci_read_irq(dev); pci_read_bases(dev, 1, 0); pci_read_config_word(dev, PCI_CB_SUBSYSTEM_VENDOR_ID,&dev->subsystem_vendor); pci_read_config_word(dev, PCI_CB_SUBSYSTEM_ID, &dev->subsystem_device);
    3.01 pci_setup_device 具体函数内容

    Initialize the device structure with information about the device's vendor,class,memory and IO-space addresses, IRQ lines etc.

    /** * pci_setup_device - Fill in class and map information of a device * @dev: the device structure to fill * * Initialize the device structure with information about the device's * vendor,class,memory and IO-space addresses, IRQ lines etc. * Called at initialisation of the PCI subsystem and by CardBus services. * Returns 0 on success and negative if unknown type of device (not normal, * bridge or CardBus). */ int pci_setup_device(struct pci_dev *dev) { u32 class; u16 cmd; u8 hdr_type; int pos = 0; struct pci_bus_region region; struct resource *res; hdr_type = pci_hdr_type(dev); dev->sysdata = dev->bus->sysdata; dev->dev.parent = dev->bus->bridge; dev->dev.bus = &pci_bus_type; dev->hdr_type = hdr_type & 0x7f; dev->multifunction = !!(hdr_type & 0x80); dev->error_state = pci_channel_io_normal; set_pcie_port_type(dev); pci_dev_assign_slot(dev); /* * Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer) * set this higher, assuming the system even supports it. */ dev->dma_mask = 0xffffffff; dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus), dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); class = pci_class(dev); dev->revision = class & 0xff; dev->class = class >> 8; /* upper 3 bytes */ pci_info(dev, "[%04x:%04x] type %02x class %#08x\n", dev->vendor, dev->device, dev->hdr_type, dev->class); if (pci_early_dump) early_dump_pci_device(dev); /* Need to have dev->class ready */ dev->cfg_size = pci_cfg_space_size(dev); /* Need to have dev->cfg_size ready */ set_pcie_thunderbolt(dev); set_pcie_untrusted(dev); /* "Unknown power state" */ dev->current_state = PCI_UNKNOWN; /* Early fixups, before probing the BARs */ pci_fixup_device(pci_fixup_early, dev); /* Device class may be changed after fixup */ class = dev->class >> 8; if (dev->non_compliant_bars) { pci_read_config_word(dev, PCI_COMMAND, &cmd); if (cmd & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { pci_info(dev, "device has non-compliant BARs; disabling IO/MEM decoding\n"); cmd &= ~PCI_COMMAND_IO; cmd &= ~PCI_COMMAND_MEMORY; pci_write_config_word(dev, PCI_COMMAND, cmd); } } dev->broken_intx_masking = pci_intx_mask_broken(dev); switch (dev->hdr_type) { /* header type */ case PCI_HEADER_TYPE_NORMAL: /* standard header */ if (class == PCI_CLASS_BRIDGE_PCI) goto bad; pci_read_irq(dev); pci_read_bases(dev, 6, PCI_ROM_ADDRESS); pci_subsystem_ids(dev, &dev->subsystem_vendor, &dev->subsystem_device); /* * Do the ugly legacy mode stuff here rather than broken chip * quirk code. Legacy mode ATA controllers have fixed * addresses. These are not always echoed in BAR0-3, and * BAR0-3 in a few cases contain junk! */ if (class == PCI_CLASS_STORAGE_IDE) { u8 progif; pci_read_config_byte(dev, PCI_CLASS_PROG, &progif); if ((progif & 1) == 0) { region.start = 0x1F0; region.end = 0x1F7; res = &dev->resource[0]; res->flags = LEGACY_IO_RESOURCE; pcibios_bus_to_resource(dev->bus, res, &region); pci_info(dev, "legacy IDE quirk: reg 0x10: %pR\n", res); region.start = 0x3F6; region.end = 0x3F6; res = &dev->resource[1]; res->flags = LEGACY_IO_RESOURCE; pcibios_bus_to_resource(dev->bus, res, &region); pci_info(dev, "legacy IDE quirk: reg 0x14: %pR\n", res); } if ((progif & 4) == 0) { region.start = 0x170; region.end = 0x177; res = &dev->resource[2]; res->flags = LEGACY_IO_RESOURCE; pcibios_bus_to_resource(dev->bus, res, &region); pci_info(dev, "legacy IDE quirk: reg 0x18: %pR\n", res); region.start = 0x376; region.end = 0x376; res = &dev->resource[3]; res->flags = LEGACY_IO_RESOURCE; pcibios_bus_to_resource(dev->bus, res, &region); pci_info(dev, "legacy IDE quirk: reg 0x1c: %pR\n", res); } } break; case PCI_HEADER_TYPE_BRIDGE: /* bridge header */ /* * The PCI-to-PCI bridge spec requires that subtractive * decoding (i.e. transparent) bridge must have programming * interface code of 0x01. */ pci_read_irq(dev); dev->transparent = ((dev->class & 0xff) == 1); pci_read_bases(dev, 2, PCI_ROM_ADDRESS1); pci_read_bridge_windows(dev); set_pcie_hotplug_bridge(dev); pos = pci_find_capability(dev, PCI_CAP_ID_SSVID); if (pos) { pci_read_config_word(dev, pos + PCI_SSVID_VENDOR_ID, &dev->subsystem_vendor); pci_read_config_word(dev, pos + PCI_SSVID_DEVICE_ID, &dev->subsystem_device); } break; case PCI_HEADER_TYPE_CARDBUS: /* CardBus bridge header */ if (class != PCI_CLASS_BRIDGE_CARDBUS) goto bad; pci_read_irq(dev); pci_read_bases(dev, 1, 0); pci_read_config_word(dev, PCI_CB_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor); pci_read_config_word(dev, PCI_CB_SUBSYSTEM_ID, &dev->subsystem_device); break; default: /* unknown header */ pci_err(dev, "unknown header type %02x, ignoring device\n", dev->hdr_type); return -EIO; bad: pci_err(dev, "ignoring class %#08x (doesn't match header type %02x)\n", dev->class, dev->hdr_type); dev->class = PCI_CLASS_NOT_DEFINED << 8; } /* We found a fine healthy device, go go go... */ return 0; }
    3.02 pci_read_bases 函数内容:
    static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) { unsigned int pos, reg; if (dev->non_compliant_bars) return; /* Per PCIe r4.0, sec 9.3.4.1.11, the VF BARs are all RO Zero */ if (dev->is_virtfn) return; /采用循环的方式调用__pci_read_base,第一个bar的地址是PCI_BASE_ADDRESS_0(0x10)/ for (pos = 0; pos < howmany; pos++) { struct resource *res = &dev->resource[pos]; reg = PCI_BASE_ADDRESS_0 + (pos << 2); pos += __pci_read_base(dev, pci_bar_unknown, res, reg); //分配资源的具体函数 } if (rom) { struct resource *res = &dev->resource[PCI_ROM_RESOURCE]; dev->rom_base_reg = rom; res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_READONLY | IORESOURCE_SIZEALIGN; __pci_read_base(dev, pci_bar_mem32, res, rom); } }
    3.03 __pci_read_base :
    /** * pci_read_base - Read a PCI BAR * @dev: the PCI device * @type: type of the BAR * @res: resource buffer to be filled in * @pos: BAR position in the config space * * Returns 1 if the BAR is 64-bit, or 0 if 32-bit. */ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, struct resource *res, unsigned int pos) { u32 l = 0, sz = 0, mask; u64 l64, sz64, mask64; u16 orig_cmd; struct pci_bus_region region, inverted_region; mask = type ? PCI_ROM_ADDRESS_MASK : ~0;/ 判断是device还是bridge / /* No printks while decoding is disabled! */ if (!dev->mmio_always_on) { pci_read_config_word(dev, PCI_COMMAND, &orig_cmd); if (orig_cmd & PCI_COMMAND_DECODE_ENABLE) { pci_write_config_word(dev, PCI_COMMAND, orig_cmd & ~PCI_COMMAND_DECODE_ENABLE); } } res->name = pci_name(dev); `获取bar空间大小,x86上,bar的地址已经在bios里面设置到寄存器里面了,读出来的就是bar的首地址,` `全部写1来确定size, 读取resource的flag` pci_read_config_dword(dev, pos, &l); pci_write_config_dword(dev, pos, l | mask); pci_read_config_dword(dev, pos, &sz); pci_write_config_dword(dev, pos, l); /* * All bits set in sz means the device isn't working properly. * If the BAR isn't implemented, all bits must be 0. If it's a * memory BAR or a ROM, bit 0 must be clear; if it's an io BAR, bit * 1 must be clear. */ if (sz == 0xffffffff) sz = 0; /* * I don't know how l can have all bits set. Copied from old code. * Maybe it fixes a bug on some ancient platform. */ if (l == 0xffffffff) l = 0; if (type == pci_bar_unknown) { res->flags = decode_bar(dev, l); res->flags |= IORESOURCE_SIZEALIGN; /根据flag判断是io还是mem/ if (res->flags & IORESOURCE_IO) { l64 = l & PCI_BASE_ADDRESS_IO_MASK; sz64 = sz & PCI_BASE_ADDRESS_IO_MASK; mask64 = PCI_BASE_ADDRESS_IO_MASK & (u32)IO_SPACE_LIMIT; } else { l64 = l & PCI_BASE_ADDRESS_MEM_MASK; sz64 = sz & PCI_BASE_ADDRESS_MEM_MASK; mask64 = (u32)PCI_BASE_ADDRESS_MEM_MASK; } } else { if (l & PCI_ROM_ADDRESS_ENABLE) res->flags |= IORESOURCE_ROM_ENABLE; l64 = l & PCI_ROM_ADDRESS_MASK; sz64 = sz & PCI_ROM_ADDRESS_MASK; mask64 = PCI_ROM_ADDRESS_MASK; } /如果是64 bit的话,就要继续读取高32bit/ if (res->flags & IORESOURCE_MEM_64) { pci_read_config_dword(dev, pos + 4, &l); pci_write_config_dword(dev, pos + 4, ~0); pci_read_config_dword(dev, pos + 4, &sz); pci_write_config_dword(dev, pos + 4, l); l64 |= ((u64)l << 32); sz64 |= ((u64)sz << 32); mask64 |= ((u64)~0 << 32); } if (!dev->mmio_always_on && (orig_cmd & PCI_COMMAND_DECODE_ENABLE)) pci_write_config_word(dev, PCI_COMMAND, orig_cmd); if (!sz64) goto fail; sz64 = pci_size(l64, sz64, mask64);/获取size大小,对bar空间全部写一,回读,最低位为1的地方就是size/ if (!sz64) { pci_info(dev, FW_BUG "reg 0x%x: invalid BAR (can't size)\n", pos); goto fail; } /合法性检测/ if (res->flags & IORESOURCE_MEM_64) { if ((sizeof(pci_bus_addr_t) < 8 || sizeof(resource_size_t) < 8) && sz64 > 0x100000000ULL) { res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED; res->start = 0; res->end = 0; pci_err(dev, "reg 0x%x: can't handle BAR larger than 4GB (size %#010llx)\n", pos, (unsigned long long)sz64); goto out; } if ((sizeof(pci_bus_addr_t) < 8) && l) { /* Above 32-bit boundary; try to reallocate */ res->flags |= IORESOURCE_UNSET; res->start = 0; res->end = sz64 - 1; pci_info(dev, "reg 0x%x: can't handle BAR above 4GB (bus address %#010llx)\n", pos, (unsigned long long)l64); goto out; } } region.start = l64; region.end = l64 + sz64 - 1; pcibios_bus_to_resource(dev->bus, res, &region); /下面有具体函数/ pcibios_resource_to_bus(dev->bus, &inverted_region, res); /* * If "A" is a BAR value (a bus address), "bus_to_resource(A)" is * the corresponding resource address (the physical address used by * the CPU. Converting that resource address back to a bus address * should yield the original BAR value: * * resource_to_bus(bus_to_resource(A)) == A * * If it doesn't, CPU accesses to "bus_to_resource(A)" will not * be claimed by the device. */ if (inverted_region.start != region.start) { res->flags |= IORESOURCE_UNSET; res->start = 0; res->end = region.end - region.start; pci_info(dev, "reg 0x%x: initial BAR value %#010llx invalid\n", pos, (unsigned long long)region.start); } goto out; fail: res->flags = 0; out: if (res->flags) pci_info(dev, "reg 0x%x: %pR\n", pos, res); return (res->flags & IORESOURCE_MEM_64) ? 1 : 0; }
    检查地址的合法性:pcibios_resource_to_bus 和 pcibios_bus_to_resource
    /* True iff r1 completely contains r2 */ static inline bool resource_contains(struct resource *r1, struct resource *r2) { if (resource_type(r1) != resource_type(r2)) return false; if (r1->flags & IORESOURCE_UNSET || r2->flags & IORESOURCE_UNSET) return false; return r1->start <= r2->start && r1->end >= r2->end; } void pcibios_resource_to_bus(struct pci_bus *bus, struct pci_bus_region *region, struct resource *res) { struct pci_host_bridge *bridge = pci_find_host_bridge(bus); struct resource_entry *window; resource_size_t offset = 0; resource_list_for_each_entry(window, &bridge->windows) { if (resource_contains(window->res, res)) { offset = window->offset; break; } } region->start = res->start - offset; region->end = res->end - offset; } EXPORT_SYMBOL(pcibios_resource_to_bus); static bool region_contains(struct pci_bus_region *region1, struct pci_bus_region *region2) { return region1->start <= region2->start && region1->end >= region2->end; } void pcibios_bus_to_resource(struct pci_bus *bus, struct resource *res, struct pci_bus_region *region) { struct pci_host_bridge *bridge = pci_find_host_bridge(bus); struct resource_entry *window; resource_size_t offset = 0; resource_list_for_each_entry(window, &bridge->windows) { struct pci_bus_region bus_region; /遍历brideg的地址范围,check类型是否一致:io or mem/ if (resource_type(res) != resource_type(window->res)) continue; bus_region.start = window->res->start - window->offset; bus_region.end = window->res->end - window->offset; /判断当前的region是否和已有的region是否有重叠/ if (region_contains(&bus_region, region)) { offset = window->offset; break; } } / 更新 res = &dev->resource[pos];/ res->start = region->start + offset; res->end = region->end + offset; }
    3.10 pci_device_add(dev, bus) 流程:
    pci_device_add(dev, bus); pci_configure_device(dev); pci_configure_mps(dev); /设置payload size, 以下4个都是通过pcie capability structure(id:10)来配置device/ pci_configure_extended_tags(dev, NULL);/config externed tags / pci_configure_relaxed_ordering(dev);/config relaxed ordering / pci_configure_ltr(dev); pci_configure_eetlp_prefix(dev); pci_configure_serr(dev);/ 和bridge的SERR#有关 / pci_acpi_program_hp_params(dev, &hp_ops); device_initialize(&dev->dev); /init device structure./ dev->dev.release = pci_release_dev;/设置release函数/ set_dev_node(&dev->dev, pcibus_to_node(bus));//设置numa_node dma_set_max_seg_size(&dev->dev, 65536); dma_set_seg_boundary(&dev->dev, 0xffffffff); pci_fixup_device(pci_fixup_header, dev); / This function disables memory decoding and releases memory resources / / of the device specified by kernel's boot parameter 'pci=resource_alignment=' / / It also rounds up size to specified alignment./ / Later on, the kernel will assign page-aligned memory resource back/ / to the device./ pci_reassigndev_resource_alignment(dev); pci_init_capabilities(dev); list_add_tail(&dev->bus_list, &bus->devices);/ Add the device to our list of discovered devices and the bus list for fixup functions / pcibios_add_device(dev); /Initialize various capabilities 比较重要,下面附详细函数/ pci_set_msi_domain(dev);/Set up MSI IRQ domain/ device_add(&dev->dev);/add device to device hierarchy./
    3.11pci_init_capabilities 具体函数内容

    进一步地根据capability 进行初始化,包括sr-iov, 下面的函数都是通过函数pci_find_capability();来超找capability ID地址的:从64——config的配置空间里面的获取第一个capability的地址,然后逐个遍历查找,找到对应的ID,获取地址。

    static void pci_init_capabilities(struct pci_dev *dev) { /* Enhanced Allocation */ pci_ea_init(dev); /* Setup MSI caps & disable MSI/MSI-X interrupts */ pci_msi_setup_pci_dev(dev); /* Buffers for saving PCIe and PCI-X capabilities */ pci_allocate_cap_save_buffers(dev); /* Power Management */ pci_pm_init(dev); /* Vital Product Data */ pci_vpd_init(dev); /* Alternative Routing-ID Forwarding */ pci_configure_ari(dev); /* Single Root I/O Virtualization */ pci_iov_init(dev); / initialize the IOV capability,获取sriov信息,计算最大的busnum, 获取VF 的total值,获取每个bar空间的size,分配资源池size=total*bar_size, 后续vf分配的bar资源就是从这里来的, 计算最大的bus号,为后面的vf使用。 / /* Address Translation Services */ pci_ats_init(dev); /* Enable ACS P2P upstream forwarding */ pci_enable_acs(dev); /* Precision Time Measurement */ pci_ptm_init(dev); /* Advanced Error Reporting */ pci_aer_init(dev); pcie_report_downtraining(dev); if (pci_probe_reset_function(dev) == 0)/check whether the device can be safely reset/ dev->reset_fn = 1; }
    Processed: 0.009, SQL: 9