aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--tools/numap/README.org48
-rw-r--r--tools/numap/go.mod3
-rw-r--r--tools/numap/internal/hwids/hwids.go148
-rw-r--r--tools/numap/internal/sysfs/parse.go21
-rw-r--r--tools/numap/internal/sysfs/pci.go145
-rw-r--r--tools/numap/numa.go116
-rw-r--r--tools/numap/numap.go31
7 files changed, 512 insertions, 0 deletions
diff --git a/tools/numap/README.org b/tools/numap/README.org
new file mode 100644
index 0000000..5781030
--- /dev/null
+++ b/tools/numap/README.org
@@ -0,0 +1,48 @@
+#+TITLE: numap
+
+Print the NUMA topology of a host.
+
+* Usage
+#+BEGIN_SRC sh
+./numap |jq .
+{
+ "node0": {
+ "name": "node0",
+ "path": "/sys/devices/system/node/node0",
+ "cpulist": "0-19,40-59",
+ "pci_devices": [
+ {
+ "vendor": "Mellanox Technologies",
+ "name": "MT27710 Family [ConnectX-4 Lx]"
+ },
+ {
+ "vendor": "Mellanox Technologies",
+ "name": "MT27710 Family [ConnectX-4 Lx]"
+ }
+ ]
+ },
+ "node1": {
+ "name": "node1",
+ "path": "/sys/devices/system/node/node1",
+ "cpulist": "20-39,60-79",
+ "pci_devices": [
+ {
+ "vendor": "Intel Corporation",
+ "name": "NVMe Datacenter SSD [3DNAND, Beta Rock Controller]"
+ }
+ ]
+ }
+}
+#+END_SRC
+
+The command will scan the host to find the NUMA nodes, and all the PCI devices, and map the PCI devices back to the NUMA node.
+
+It also provides a way to see the list of CPUs attached to the node.
+
+* Limitations
+** Device class
+For now only the following classes of hardware are cared for:
+- NVMe
+- network
+- GPU
+
diff --git a/tools/numap/go.mod b/tools/numap/go.mod
new file mode 100644
index 0000000..92b1885
--- /dev/null
+++ b/tools/numap/go.mod
@@ -0,0 +1,3 @@
+module golang.fcuny.net/numap
+
+go 1.17
diff --git a/tools/numap/internal/hwids/hwids.go b/tools/numap/internal/hwids/hwids.go
new file mode 100644
index 0000000..6aa9d8a
--- /dev/null
+++ b/tools/numap/internal/hwids/hwids.go
@@ -0,0 +1,148 @@
+package hwids
+
+import (
+ "bufio"
+ "fmt"
+ "os"
+ "strings"
+)
+
+var pciPath = []string{
+ "/usr/share/hwdata/pci.ids",
+ "/usr/share/misc/pci.ids",
+}
+
+type PCIType int
+
+const (
+ PCIVendor PCIType = iota
+ PCIDevice
+ PCISubsystem
+)
+
+type PciDevices map[uint16][]PciDevice
+
+// PciDevice represents a PCI device
+type PciDevice struct {
+ Type PCIType
+ Vendor, Device uint16
+ SubVendor, SubDevice uint16
+ VendorName, DeviceName string
+ SubName string
+}
+
+// Load load the hardware database for PCI devices and return a map of
+// vendor -> list of devices.
+func Load() (PciDevices, error) {
+ // if the environment variable HWDATAPATH is set, we add it to the
+ // list of paths we check for the hardware database.
+ extraPath := os.Getenv("HWDATA")
+ if extraPath != "" {
+ pciPath = append(pciPath, extraPath)
+ }
+
+ for _, f := range pciPath {
+ fh, err := os.Open(f)
+ if err != nil {
+ continue
+ }
+ defer fh.Close()
+ return parse(fh)
+ }
+ return PciDevices{}, fmt.Errorf("hwids: could not find a pci.ids file")
+}
+
+func parse(f *os.File) (PciDevices, error) {
+ devices := make(PciDevices)
+
+ s := bufio.NewScanner(f)
+
+ // this is to keep track of the current device. The format of the
+ // file is as follow:
+ // vendor vendor_name
+ // device device_name <-- single tab
+ // subvendor subdevice subsystem_name <-- two tabs
+ // the variable is to keep track of the current vendor / device
+ cur := PciDevice{}
+
+ for s.Scan() {
+ l := s.Text()
+ // skip empty lines or lines that are a comment
+ if len(l) == 0 || l[0] == '#' {
+ continue
+ }
+ // lines starting with a C are the classes definitions, and
+ // they are at the end of the file, which means we're done
+ // parsing the devices
+ if l[0] == 'C' {
+ break
+ }
+
+ parts := strings.SplitN(l, " ", 2)
+ if len(parts) != 2 {
+ return devices, fmt.Errorf("hwids: malformed PCI ID line (missing ID separator): %s", l)
+ }
+
+ ids, name := parts[0], parts[1]
+ if len(ids) < 2 || len(name) == 0 {
+ return devices, fmt.Errorf("hwids: malformed PCI ID line (empty ID or name): %s", l)
+ }
+
+ cur.Type = PCIVendor
+
+ if ids[0] == '\t' {
+ if ids[1] == '\t' {
+ cur.Type = PCISubsystem
+ } else {
+ cur.Type = PCIDevice
+ }
+ }
+
+ var err error
+ switch cur.Type {
+ case PCIVendor:
+ _, err = fmt.Sscanf(ids, "%x", &cur.Vendor)
+ cur.VendorName = name
+ case PCIDevice:
+ _, err = fmt.Sscanf(ids, "%x", &cur.Device)
+ cur.DeviceName = name
+ case PCISubsystem:
+ _, err = fmt.Sscanf(ids, "%x %x", &cur.SubVendor, &cur.SubDevice)
+ cur.SubName = name
+ }
+
+ if err != nil {
+ return devices, fmt.Errorf("hwids: malformed PCI ID line: %s: %v", l, err)
+ }
+
+ // This is to reset the state when we are moving to a
+ // different vendor or device
+ switch cur.Type {
+ case PCIVendor:
+ cur.Device = 0
+ cur.DeviceName = ""
+ fallthrough
+ case PCIDevice:
+ cur.SubVendor = 0
+ cur.SubDevice = 0
+ cur.SubName = ""
+ }
+
+ _, ok := devices[cur.Vendor]
+ if ok {
+ _devices := devices[cur.Vendor]
+ _devices = append(_devices, cur)
+ devices[cur.Vendor] = _devices
+
+ } else {
+ _devices := []PciDevice{cur}
+ devices[cur.Vendor] = _devices
+ }
+ }
+
+ if err := s.Err(); err != nil {
+ return devices, fmt.Errorf("hwids: failed to read PCI ID line: %v", err)
+ }
+
+ return devices, nil
+}
diff --git a/tools/numap/internal/sysfs/parse.go b/tools/numap/internal/sysfs/parse.go
new file mode 100644
index 0000000..d518653
--- /dev/null
+++ b/tools/numap/internal/sysfs/parse.go
@@ -0,0 +1,21 @@
+package sysfs
+
+import (
+ "io/ioutil"
+ "strconv"
+ "strings"
+)
+
+// ContentUint64 parses the content of a file in sysfs, and convert
+// from hex to uint64.
+func ContentUint64(path string) (uint64, error) {
+ content, err := ioutil.ReadFile(path)
+ if err != nil {
+ return 0, err
+ }
+ result, err := strconv.ParseUint(strings.TrimSpace(string(content)), 0, 64)
+ if err != nil {
+ return 0, err
+ }
+ return result, nil
+}
diff --git a/tools/numap/internal/sysfs/pci.go b/tools/numap/internal/sysfs/pci.go
new file mode 100644
index 0000000..9e714b1
--- /dev/null
+++ b/tools/numap/internal/sysfs/pci.go
@@ -0,0 +1,145 @@
+package sysfs
+
+import (
+ "fmt"
+ "io/ioutil"
+ "path"
+ "path/filepath"
+ "strconv"
+ "strings"
+)
+
+const (
+ sysFsPCIDevicesPath = "/sys/bus/pci/devices/"
+)
+
+type PCIDevice struct {
+ NumaNode int
+ ID string
+ Device, Vendor uint64
+ SubVendor, SubDevice uint64
+ Class uint64
+ MSIs []int
+}
+
+func ScanPCIDevices() []PCIDevice {
+ devices, err := ioutil.ReadDir(sysFsPCIDevicesPath)
+ if err != nil {
+ panic(err)
+ }
+
+ pciDevices := []PCIDevice{}
+
+ for _, device := range devices {
+ dpath := filepath.Join(sysFsPCIDevicesPath, device.Name())
+ pcid, err := NewPCIDevice(dpath, device.Name())
+ if err != nil {
+ panic(err)
+ }
+ pciDevices = append(pciDevices, pcid)
+ }
+ return pciDevices
+}
+
+func getPCIDeviceClass(path string) (uint64, error) {
+ return ContentUint64(filepath.Join(path, "class"))
+}
+
+func getPCIDeviceVendor(path string) (uint64, error) {
+ return ContentUint64(filepath.Join(path, "vendor"))
+}
+
+func getPCIDeviceId(path string) (uint64, error) {
+ return ContentUint64(filepath.Join(path, "device"))
+}
+
+func getPCIDeviceSubsystemDevice(path string) (uint64, error) {
+ return ContentUint64(filepath.Join(path, "subsystem_device"))
+}
+
+func getPCIDeviceSubsystemVendor(path string) (uint64, error) {
+ return ContentUint64(filepath.Join(path, "subsystem_vendor"))
+}
+
+func getPCIDeviceNumaNode(path string) int {
+ content, err := ioutil.ReadFile(filepath.Join(path, "numa_node"))
+ if err != nil {
+ panic(err)
+ }
+ nodeNum, err := strconv.Atoi(strings.TrimSpace(string(content)))
+ if err != nil {
+ panic(err)
+ }
+ return nodeNum
+}
+
+func getPCIDeviceMSIx(p string) []int {
+ g := fmt.Sprintf("%s/*", filepath.Join(p, "msi_irqs"))
+ files, err := filepath.Glob(g)
+ if err != nil {
+ panic(err)
+ }
+ if len(files) == 0 {
+ return []int{}
+ }
+
+ msix := []int{}
+
+ for _, f := range files {
+ content, err := ioutil.ReadFile(f)
+ if err != nil {
+ panic(err)
+ }
+ if strings.TrimSpace(string(content)) == "msix" {
+ base := path.Base(f)
+ v, err := strconv.Atoi(base)
+ if err != nil {
+ panic(err)
+ }
+ msix = append(msix, v)
+ }
+ }
+ return msix
+}
+
+func NewPCIDevice(path, name string) (PCIDevice, error) {
+ nodeNum := getPCIDeviceNumaNode(path)
+
+ device, err := getPCIDeviceId(path)
+ if err != nil {
+ return PCIDevice{}, err
+ }
+
+ vendor, err := getPCIDeviceVendor(path)
+ if err != nil {
+ return PCIDevice{}, err
+ }
+
+ subvendor, err := getPCIDeviceSubsystemVendor(path)
+ if err != nil {
+ return PCIDevice{}, err
+ }
+
+ subdevice, err := getPCIDeviceSubsystemDevice(path)
+ if err != nil {
+ return PCIDevice{}, err
+ }
+
+ deviceClass, err := getPCIDeviceClass(path)
+ if err != nil {
+ return PCIDevice{}, err
+ }
+
+ msix := getPCIDeviceMSIx(path)
+
+ return PCIDevice{
+ ID: name,
+ Device: device,
+ Class: deviceClass,
+ NumaNode: nodeNum,
+ Vendor: vendor,
+ SubVendor: subvendor,
+ SubDevice: subdevice,
+ MSIs: msix,
+ }, nil
+}
diff --git a/tools/numap/numa.go b/tools/numap/numa.go
new file mode 100644
index 0000000..402ea1d
--- /dev/null
+++ b/tools/numap/numa.go
@@ -0,0 +1,116 @@
+package main
+
+import (
+ "fmt"
+ "io/ioutil"
+ "path"
+ "path/filepath"
+ "strings"
+
+ "golang.fcuny.net/numap/internal/hwids"
+ "golang.fcuny.net/numap/internal/sysfs"
+)
+
+const (
+ node_root = "/sys/devices/system/node/node*"
+ CLASS_NVMe = 67586
+ CLASS_ETHERNET = 131072
+ CLASS_GPU = 197120
+)
+
+type node struct {
+ Name string `json:"name"`
+ Path string `json:"path"`
+ CpuList string `json:"cpulist"`
+ PCIDevices []PCIDevice `json:"pci_devices"`
+}
+
+type PCIDevice struct {
+ Vendor string `json:"vendor"`
+ Name string `json:"name"`
+}
+
+func findNodes(hwdb hwids.PciDevices) (map[string]node, error) {
+ nodes := make(map[string]node)
+
+ files, err := filepath.Glob(node_root)
+ if err != nil {
+ return nil, fmt.Errorf("Failed to find NUMA nodes under %s: %+v", node_root, err)
+ }
+ if len(files) == 0 {
+ return nil, fmt.Errorf("Could not find NUMA node in %s", node_root)
+ }
+
+ for _, f := range files {
+ n, err := newNode(f)
+ if err != nil {
+ return make(map[string]node), err
+ }
+ nodes[n.Name] = n
+ }
+
+ r, err := mapPCIDevicesToNumaNode(hwdb)
+ if err != nil {
+ panic(err)
+ }
+ for k, v := range r {
+ nodeName := fmt.Sprintf("node%d", k)
+ n := nodes[nodeName]
+ n.PCIDevices = v
+ nodes[nodeName] = n
+ }
+ return nodes, nil
+}
+
+func mapPCIDevicesToNumaNode(hwdb hwids.PciDevices) (map[int][]PCIDevice, error) {
+ devices := sysfs.ScanPCIDevices()
+ r := map[int][]PCIDevice{}
+
+ for _, d := range devices {
+ if d.Class == CLASS_NVMe || d.Class == CLASS_ETHERNET || d.Class == CLASS_GPU {
+ _, ok := hwdb[uint16(d.Vendor)]
+ if ok {
+ desc := hwdb[uint16(d.Vendor)]
+ var vendor, name string
+ for _, m := range desc {
+ if uint64(m.Device) == d.Device && uint64(m.Vendor) == d.Vendor {
+ vendor = m.VendorName
+ name = m.DeviceName
+ break
+ }
+ }
+ pciDevice := PCIDevice{
+ Vendor: vendor,
+ Name: name,
+ }
+ r[d.NumaNode] = append(r[d.NumaNode], pciDevice)
+ }
+ }
+ }
+ return r, nil
+}
+
+func newNode(p string) (node, error) {
+ _, name := path.Split(p)
+
+ cpulist, err := cpuList(p)
+ if err != nil {
+ return node{}, err
+ }
+
+ return node{
+ Name: name,
+ Path: p,
+ CpuList: cpulist,
+ PCIDevices: []PCIDevice{},
+ }, nil
+}
+
+func cpuList(p string) (string, error) {
+ lpath := filepath.Join(p, "cpulist")
+ c, err := ioutil.ReadFile(lpath)
+ if err != nil {
+ return "", fmt.Errorf("Failed to open %s: %+v", lpath, err)
+ }
+ return strings.TrimRight(string(c), "\n"), nil
+}
diff --git a/tools/numap/numap.go b/tools/numap/numap.go
new file mode 100644
index 0000000..c65f1f0
--- /dev/null
+++ b/tools/numap/numap.go
@@ -0,0 +1,31 @@
+package main
+
+import (
+ "encoding/json"
+ "fmt"
+ "os"
+
+ "golang.fcuny.net/numap/internal/hwids"
+)
+
+func main() {
+ hwdb, err := hwids.Load()
+ if err != nil {
+ fmt.Println(err)
+ os.Exit(1)
+ }
+
+ nodes, err := findNodes(hwdb)
+ if err != nil {
+ fmt.Println(err)
+ os.Exit(1)
+ }
+
+ out, err := json.Marshal(nodes)
+ if err != nil {
+ fmt.Println(err)
+ os.Exit(1)
+ }
+
+ fmt.Println(string(out))
+}