如何用上Intel 82576中的Allocation of Tx Bandwidth to VMs

发布于 2022-09-30 18:52:20 字数 434 浏览 19 评论 0

最近在看Linux 2.6.32.13中igb的代码,现在有个问题,怎么使用Intel 82576中的Allocation of Tx Bandwidth to VMs功能?
该功能可以对分配给虚拟机的虚拟网卡做带宽分配,有大侠用过吗?
igb驱动怎么改可以用上这个功能呢?以下是在网上搜的:
http://sourceforge.net/mailarchi ... um_name=e1000-devel

上述连接中,相关讨论邮件说的,我做了个实验,都不能用上,甚至VF都没虚出来了,不知哪位大侠知道,讨论讨论?

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(3

红颜悴 2022-10-07 18:52:20

[E1000-devel] [rfc 3/3 v3] [rfc 4/4] igb: expose 82576 bandiwidth allocation
From: Simon Horman <horms@ve...> - 2009-11-26 00:42

The 82576 has support for bandwidth allocation to VFs.

Contrary to the documentation in the 82576 datasheet v2.41 this
appears to work as follows:

* The ratio supplied is always proportional to 1Gbit/s,
  regardless of if the link speed.
* The ratio supplied is an upper-bound on bandwidth available
  to the VF, not a minimun guarantee

This patch exposes bandwidth control to userspace through a simple
per-device (PF) sysfs file, bandwidth_allocation.

* The file contains a whitespace delimited list of values, one per VF.
* The first value corresponds to the first VF and so on.
* Valid values are integers from 0 to 1000
* A value of 0 indicates that bandwidth_allocation is disabled.
* Other values indicate the allocated bandwidth, in 1/1000ths of a gigabit/s

e.g. The following for a PF with 4 VFs allocates ~20Mbits/ to VF 1,
     ~100Mbit/s to VF 2, and leave the other 2 VFs with no allocation.

     echo "20 100 0 0" > /sys/class/net/eth3/device/bandwidth_allocation

This interface is intended to allow testing of the hardware feature.
There are ongoing discussions about how to expose this feature
to user-space in a more generic way.

Cc: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Simon Horman <horms@verge.net.au>

---
Thu, 05 Nov 2009 11:58:51 +1100
* Initial post

Wed, 25 Nov 2009 16:58:23 +1100
* Refresh for changes to proceeding patches in series
* Up-port to latest net-next

Index: net-next-2.6/drivers/net/igb/igb_main.c
===================================================================
--- net-next-2.6.orig/drivers/net/igb/igb_main.c        2009-11-26 10:33:01.000000000 +1100
+++ net-next-2.6/drivers/net/igb/igb_main.c        2009-11-26 10:33:01.000000000 +1100
@@ -47,6 +47,9 @@
#ifdef CONFIG_IGB_DCA
#include <linux/dca.h>
#endif
+#ifdef CONFIG_PCI_IOV
+#include <linux/ctype.h>
+#endif
#include "igb.h"

#define DRV_VERSION "2.1.0-k2"
@@ -157,6 +160,15 @@ static unsigned int max_vfs = 0;
module_param(max_vfs, uint, 0);
MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
                  "per physical function");
+
+static ssize_t igb_set_bandwidth_allocation(struct device *,
+                                            struct device_attribute *,
+                                            const char *, size_t);
+static ssize_t igb_show_bandwidth_allocation(struct device *,
+                                             struct device_attribute *,
+                                             char *);
+DEVICE_ATTR(bandwidth_allocation, S_IRUGO | S_IWUSR,
+            igb_show_bandwidth_allocation, igb_set_bandwidth_allocation);
#endif /* CONFIG_PCI_IOV */

static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
@@ -1760,6 +1772,19 @@ static void __devinit igb_init_vf(struct
        if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
                goto err_free;

+        if (device_create_file(&pdev->dev, &dev_attr_bandwidth_allocation))
+                goto err_sriov;
+
+        adapter->bandwidth_allocation = kcalloc(adapter->vfs_allocated_count,
+                                                sizeof(unsigned int),
+                                                GFP_KERNEL);
+        if (!adapter->bandwidth_allocation)
+                goto err_file;
+        memset(adapter->bandwidth_allocation,
+               adapter->vfs_allocated_count * sizeof(unsigned int), 0);
+
+        spin_lock_init(&adapter->bandwidth_allocation_lock);
+
        dev_info(&pdev->dev, "%d vfs allocated\n",
                 adapter->vfs_allocated_count);
        for (i = 0; i < adapter->vfs_allocated_count; i++) {
@@ -1768,6 +1793,10 @@ static void __devinit igb_init_vf(struct
        }

        return;
+err_file:
+        device_remove_file(&pdev->dev, &dev_attr_bandwidth_allocation);
+err_sriov:
+        pci_disable_sriov(pdev);
err_free:
        kfree(adapter->vf_data);
err_zero:
@@ -1892,6 +1921,7 @@ static void igb_init_hw_timer(struct igb
static void igb_cleanup_vf(struct igb_adapter * adapter)
{
#ifdef CONFIG_PCI_IOV
+        struct pci_dev *pdev = adapter->pdev;
        struct e1000_hw *hw = &adapter->hw;

        if (!adapter->vf_data)
@@ -1908,6 +1938,9 @@ static void igb_cleanup_vf(struct igb_ad
        wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
        msleep(100);
        dev_info(&adapter->pdev->dev, "IOV Disabled\n");
+
+        device_remove_file(&pdev->dev, &dev_attr_bandwidth_allocation);
+        kfree(adapter->bandwidth_allocation);
#endif
}

@@ -2216,6 +2249,123 @@ void igb_configure_tx_ring(struct igb_ad
        wr32(E1000_TXDCTL(reg_idx), txdctl);
}

+#ifdef CONFIG_PCI_IOV
+static void igb_disable_bandwidth_allocation_vf(struct e1000_hw *hw, int vf)
+{
+        wr32(E1000_VMBASEL, vf);
+        wr32(E1000_VMBAC, 0);
+}
+
+static void igb_disable_bandwidth_allocation(struct igb_adapter *adapter)
+{
+        struct e1000_hw *hw = &adapter->hw;
+        int i;
+
+        for (i = 0; i < adapter->vfs_allocated_count; i++)
+                igb_disable_bandwidth_allocation_vf(hw, i);
+}
+
+static void igb_enable_bandwidth_allocation_vf(struct e1000_hw *hw, int vf,
+                                               unsigned int allocation)
+{
+        u32 rq;
+
+        /* Allocation is expressed as 1000ths of link speed [+]
+         *
+         * rq is calcualted as 1 / (allocation / 1000) = 1000 / allocation
+         *
+         * E1000_VMBAC_RF_INT_SHIFT and E1000_VMBAC_RF_MASK are used
+         * to marshal the result into the desired format: 23 bits of
+         * which 14 are to the right of the decimal point.
+         *
+         * [+] According to the the 82576 v2.41 datasheet rq should
+         *     be a ratio of the link speed, however, empirically
+         *     it appears to always be a ration of to 1Gbit/s,
+         *     even when the link is 100Mbit/s.
+         */
+        rq = ((1000 << E1000_VMBAC_RF_INT_SHIFT) / allocation) &
+             E1000_VMBAC_RF_MASK;
+
+        wr32(E1000_VMBASEL, vf);
+        wr32(E1000_VMBAC, rq|E1000_VMBAC_RC_ENA);
+}
+
+static void igb_enable_bandwidth_allocation(struct igb_adapter *adapter)
+{
+        u32 i, reg;
+        struct e1000_hw *hw = &adapter->hw;
+
+        /* Only enable bandwidth_allocation if it has been set
+         * and the link speed is 100Mbit/s or 1Gbit/s */
+        if (!adapter->bandwidth_allocation ||
+            (adapter->link_speed != SPEED_100 &&
+             adapter->link_speed != SPEED_1000)) {
+                igb_disable_bandwidth_allocation(adapter);
+                return;
+        }
+
+        for (i = 0; i < adapter->vfs_allocated_count; i++) {
+                wr32(E1000_VMBASEL, i);
+                if (adapter->bandwidth_allocation)
+                        igb_enable_bandwidth_allocation_vf(hw, i,
+                                        adapter->bandwidth_allocation);
+                else
+                        igb_disable_bandwidth_allocation_vf(hw, i);
+
+                /* XXX:
+                 *
+                 * The 82576 datasheet, section 4.5.11.1.5.1 "Configuring Tx
+                 * Bandwidth to VMs" states that the desired setting is:
+                 * VMBAMMW.MMW_SIZE = 16 * MSS
+                 *
+                 * But isn't  MSS a property of skbs that are using tso
+                 * rather than adapters?
+                 *
+                 * If so, should we use the maximum value here? */
+                /* XXX: Should this go inside or outside the for loop ? */
+                reg = 64 * 16;
+                wr32(E1000_VMBAMMW, reg);
+        }
+}
+#endif
+
+static void igb_check_bandwidth_allocation(struct igb_adapter *adapter)
+{
+#ifdef CONFIG_PCI_IOV
+        u32 vmbacs;
+        struct e1000_hw *hw = &adapter->hw;
+
+        if (!adapter->vf_data)
+                return;
+
+        /* The 82576 datasheet, section 4.5.11.1.5.2 "Link Speed Change
+         * Procedure" describes the sequence below. However the
+         * SPEED_CHG never seems to be set.
+         */
+        vmbacs = rd32(E1000_VMBACS);
+        if (vmbacs & E1000_VMBACS_SPEED_CHG) {
+                /* XXX: Never seem to get here */
+                int err = 0;
+
+                if (vmbacs & E1000_VMBACS_VMBA_SET) {
+                        igb_disable_bandwidth_allocation(adapter);
+                        err = 1;
+                }
+
+                vmbacs &= ~E1000_VMBACS_SPEED_CHG;
+                wr32(E1000_VMBACS, vmbacs);
+
+                if (err)
+                        return;
+        }
+
+        spin_lock(&adapter->bandwidth_allocation_lock);
+        igb_enable_bandwidth_allocation(adapter);
+        spin_unlock(&adapter->bandwidth_allocation_lock);
+#endif
+        return;
+}
+
/**
  * igb_configure_tx - Configure transmit Unit after Reset
  * @adapter: board private structure
@@ -3100,6 +3250,8 @@ static void igb_watchdog_task(struct wor
                                break;
                        }

+                        igb_check_bandwidth_allocation(adapter);
+
                        netif_carrier_on(netdev);

                        igb_ping_all_vfs(adapter);
@@ -5999,4 +6151,101 @@ static void igb_vmm_control(struct igb_a
        }
}

+#ifdef CONFIG_PCI_IOV
+static ssize_t igb_show_bandwidth_allocation(struct device *dev,
+                                             struct device_attribute *attr,
+                                             char *buf)
+{
+        struct net_device *netdev = dev_get_drvdata(dev);
+        struct igb_adapter *adapter = netdev_priv(netdev);
+        int i;
+
+        if (!adapter->vf_data)
+                return -ENOENT;
+
+        *buf = '\0';
+        for (i = 0; i < adapter->vfs_allocated_count; i++) {
+                if (i > 0)
+                        strcat(buf, " ");
+                sprintf(buf + strlen(buf), "%i",
+                        adapter->bandwidth_allocation);
+        }
+        strcat(buf, "\n");
+
+        return strlen(buf);
+}
+
+static unsigned long igb_strtoul(const char *cp, char **endp, unsigned int base)
+{
+        const char *orig = cp;
+        unsigned long x;
+
+        while (isspace(*cp))
+                cp++;
+
+        x = simple_strtoul(cp, endp, base);
+        if (cp == *endp)
+                *endp = (char *)orig;
+
+        return x;
+}
+
+static ssize_t igb_set_bandwidth_allocation(struct device *dev,
+                                            struct device_attribute *attr,
+                                            const char *buf, size_t count)
+{
+        struct net_device *netdev = dev_get_drvdata(dev);
+        struct igb_adapter *adapter = netdev_priv(netdev);
+        int i;
+        size_t len;
+        ssize_t status = -ENOENT;
+        unsigned int *new, total;
+        unsigned long x;
+        const char *p;
+        char *next_p;
+
+        if (!adapter->vf_data)
+                return -ENOENT;
+
+        len = adapter->vfs_allocated_count * sizeof(unsigned int);
+
+        new = kmalloc(len, GFP_KERNEL);
+        if (!new)
+                return -ENOMEM;
+
+        p = buf;
+        total = 0;
+        for (i = 0; i < adapter->vfs_allocated_count; i++) {
+                x = igb_strtoul(p, &next_p, 10);
+                if (p == next_p) {
+                        dev_err(dev, "not enough values\n");
+                        goto err;
+                }
+                if (x > 1000) {
+                        dev_err(dev, "value is too large\n");
+                        goto err;
+                }
+                new = x;
+                total += x;
+                p = next_p;
+        }
+
+        /* Check for trailing rubbish */
+        igb_strtoul(p, &next_p, 10);
+        if (p != next_p) {
+                dev_err(dev, "trailing rubbish\n");
+                goto err;
+        }
+
+        spin_lock(&adapter->bandwidth_allocation_lock);
+        memcpy(adapter->bandwidth_allocation, new, len);
+        igb_enable_bandwidth_allocation(adapter);
+        spin_unlock(&adapter->bandwidth_allocation_lock);
+
+        status = count;
+err:
+        kfree(new);
+        return status;
+}
+#endif /* CONFIG_PCI_IOV */
/* igb_main.c */
Index: net-next-2.6/drivers/net/igb/e1000_regs.h
===================================================================
--- net-next-2.6.orig/drivers/net/igb/e1000_regs.h        2009-11-26 10:32:02.000000000 +1100
+++ net-next-2.6/drivers/net/igb/e1000_regs.h        2009-11-26 10:33:01.000000000 +1100
@@ -311,6 +311,16 @@
#define E1000_VLVF(_n)         (0x05D00 + (4 * (_n))) /* VLAN Virtual Machine
                                                        * Filter - RW */

+/* Tx Bandwidth Allocation to VM Registers */
+#define E1000_VMBACS        0x03600 /* VM Bandwidth Allocation
+                                 * Control & Status - RW */
+#define E1000_VMBAMMW        0x03670 /* VM Bandwidth Allocation
+                                 * Max Memory Window - RW */
+#define E1000_VMBASEL        0x03604 /* VM Bandwidth Allocation
+                                 * Select - RW */
+#define E1000_VMBAC        0x03608 /* VM Bandwidth Allocation
+                                 * Config - RW */
+
#define wr32(reg, value) (writel(value, hw->hw_addr + reg))
#define rd32(reg) (readl(hw->hw_addr + reg))
#define wrfl() ((void)rd32(E1000_STATUS))
Index: net-next-2.6/drivers/net/igb/e1000_defines.h
===================================================================
--- net-next-2.6.orig/drivers/net/igb/e1000_defines.h        2009-11-26 10:32:02.000000000 +1100
+++ net-next-2.6/drivers/net/igb/e1000_defines.h        2009-11-26 10:33:01.000000000 +1100
@@ -724,4 +724,13 @@
#define E1000_PCIEMISC_LX_DECISION      0x00000080 /* Lx power decision based
                                                       on DMA coal */

+/* VM Bandwidth Allocation Control & Status */
+#define E1000_VMBACS_VMBA_SET                0x00001000
+#define E1000_VMBACS_SPEED_CHG                0x80000000
+
+/* VM Bandwidth Allocation Config */
+#define E1000_VMBAC_RF_INT_SHIFT        14
+#define E1000_VMBAC_RF_MASK                ((1<<23)-1)        /* RF_DEC and RF_INT */
+#define E1000_VMBAC_RC_ENA                0x80000000
+
#endif
Index: net-next-2.6/drivers/net/igb/igb.h
===================================================================
--- net-next-2.6.orig/drivers/net/igb/igb.h        2009-11-26 10:32:02.000000000 +1100
+++ net-next-2.6/drivers/net/igb/igb.h        2009-11-26 10:33:01.000000000 +1100
@@ -312,6 +312,10 @@ struct igb_adapter {
        unsigned int vfs_allocated_count;
        struct vf_data_storage *vf_data;
        u32 rss_queues;
+#ifdef CONFIG_PCI_IOV
+        unsigned int *bandwidth_allocation;
+        spinlock_t bandwidth_allocation_lock;
+#endif
};

#define IGB_FLAG_HAS_MSI           (1 << 0)

極樂鬼 2022-10-07 18:52:20

分析上述代码,实际上主要的思想是,在igb_probe中在/sys目录下创建一个可读写的文件,
以该文件进行用户态与内核态之间信息的交互,向该文件中写入要设置的分配给VF的带宽大小。
然后驱动程序读取该文件中的值,设置82576相对应的寄存器,即可。

独行侠 2022-10-07 18:52:20

本帖最后由 wangpeng168 于 2011-03-29 17:31 编辑

不过上述代码参考的是Intel 82576的Datasheet版本为2.41,我看的为2.61的,2.61规范上说的设置寄存器步骤与实现的代码好像有点不一样,有看过Intel 82576的Datasheet版本为2.61的大侠吗?但是根据2.61的也做了实验,VF是正常出现了,但是分配给虚拟机时,虚拟机发现不了VF。

另外我读取 VMBACS (0x3600; RW)时,
vmbacs = rd32(E1000_VMBACS);
printk(KERN_INFO "TEST: 0x%08x\n", vmbacs);

打印出来的值一直是0x00c00804
初始的时候2.6文档说应该是:0x00400804 这个啊,不知道为什么?文档说保留位21~23必须设置为010b的啊?

有知道的么?跟这个关系大么?

另外,我设置wr32(E1000_VMBACS, 0x0F470804);
开启带宽分配功能,执行到igb_probe的末尾又变成0x00c00804

所有操作是在igb_probe函数的如下宏定义

#ifdef CONFIG_PCI_IOV
...
#end

中添加的

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文