Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 65 additions & 11 deletions lisa/sut_orchestrator/libvirt/libvirt_device_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,24 +186,78 @@ def create_device_pool(
vendor_id=vendor_id,
device_id=device_id,
)
primary_nic_iommu = self.get_primary_nic_id()
for item in device_list:
device = DeviceAddressSchema()
domain, bus, slot, fn = self._parse_pci_address_str(addr=item.slot)
device.domain = domain
device.bus = bus
device.slot = slot
device.function = fn
bdf_list = [i.slot for i in device_list]
self._create_pool(pool_type, bdf_list)

def create_device_pool_from_pci_addresses(
self,
pool_type: HostDevicePoolType,
pci_addr_list: List[str],
) -> None:
self.available_host_devices[pool_type] = {}
for bdf in pci_addr_list:
domain, bus, slot, fn = self._parse_pci_address_str(bdf)
device = self._get_pci_address_instance(domain, bus, slot, fn)
iommu_group = self._get_device_iommu_group(device)
is_vfio_pci = self._is_driver_vfio_pci(device)

if not is_vfio_pci and iommu_group not in primary_nic_iommu:
# Get all the devices of that iommu group
iommu_path = f"/sys/kernel/iommu_groups/{iommu_group}/devices"
bdf_list = [i.strip() for i in self.host_node.tools[Ls].list(iommu_path)]
bdf_list.append(bdf.strip()) # append the given device in list

self._create_pool(pool_type, bdf_list)

def _create_pool(
self,
pool_type: HostDevicePoolType,
bdf_list: List[str],
) -> None:
iommu_grp_of_used_devices = []
primary_nic_iommu = self.get_primary_nic_id()
for bdf in bdf_list:
domain, bus, slot, fn = self._parse_pci_address_str(bdf)
dev = self._get_pci_address_instance(domain, bus, slot, fn)
Copy link
Contributor

@gamora12 gamora12 Mar 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: Rename dev to device for uniform naming scheme.

is_vfio_pci = self._is_driver_vfio_pci(dev)
iommu_group = self._get_device_iommu_group(dev)

if iommu_group in iommu_grp_of_used_devices:
# No need to add this device in pool as one of the devices for this
# iommu group is in use
continue

if is_vfio_pci:
# Do not consider any device for pool if any device of same iommu group
# is already assigned
pool = self.available_host_devices.get(pool_type, {})
pool.pop(iommu_group, [])
self.available_host_devices[pool_type] = pool
iommu_grp_of_used_devices.append(iommu_group)
elif (
iommu_group not in primary_nic_iommu
and iommu_group not in iommu_grp_of_used_devices
):
pool = self.available_host_devices.get(pool_type, {})
devices = pool.get(iommu_group, [])
devices.append(device)
if dev not in devices:
devices.append(dev)
pool[iommu_group] = devices
self.available_host_devices[pool_type] = pool

def _get_pci_address_instance(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be a constructor in DeviceAddressSchema.

Also, DeviceAddressSchema can be renamed to just DeviceAddress

self,
domain: str,
bus: str,
slot: str,
fn: str,
) -> DeviceAddressSchema:
device = DeviceAddressSchema()
device.domain = domain
device.bus = bus
device.slot = slot
device.function = fn

return device

def _add_device_passthrough_xml(
self,
devices: ET.Element,
Expand Down
12 changes: 11 additions & 1 deletion lisa/sut_orchestrator/libvirt/schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dataclasses import dataclass, field
from enum import Enum
from typing import List, Optional, Union
from typing import Any, List, Optional, Union

from dataclasses_json import dataclass_json

Expand Down Expand Up @@ -48,6 +48,16 @@ class DeviceAddressSchema:
slot: str = ""
function: str = ""

def __eq__(self, other: Any) -> bool:
if isinstance(other, DeviceAddressSchema):
return (
self.domain == other.domain
and self.bus == other.bus
and self.slot == other.slot
and self.function == other.function
)
return False


# QEMU orchestrator's global configuration options.
@dataclass_json()
Expand Down
65 changes: 47 additions & 18 deletions lisa/sut_orchestrator/util/device_pool.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from typing import Any, List, Optional
from typing import Any, List, Optional, cast

from lisa.sut_orchestrator.util.schema import HostDevicePoolSchema, HostDevicePoolType
from lisa.sut_orchestrator.util.schema import (
HostDevicePoolSchema,
HostDevicePoolType,
VendorDeviceIdIdentifier,
)
from lisa.util import LisaException


Expand All @@ -16,6 +20,13 @@ def create_device_pool(
) -> None:
raise NotImplementedError()

def create_device_pool_from_pci_addresses(
self,
pool_type: HostDevicePoolType,
pci_addr_list: List[str],
) -> None:
raise NotImplementedError()

def get_primary_nic_id(self) -> List[str]:
raise NotImplementedError()

Expand Down Expand Up @@ -44,22 +55,40 @@ def configure_device_passthrough_pool(
f"Pool type '{pool_type}' is not supported by platform"
)
for config in device_configs:
vendor_device_list = config.devices
if len(vendor_device_list) > 1:
raise LisaException(
"Device Pool does not support more than one "
"vendor/device id list for given pool type"
)
devices = config.devices
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can check for the existence of device_configs early and return immediately if it doesn't exist to avoid nesting.

if isinstance(devices, list) and all(
isinstance(d, VendorDeviceIdIdentifier) for d in devices
):
if len(devices) > 1:
raise LisaException(
"Device Pool does not support more than one "
"vendor/device id list for given pool type"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we support one set of vendor id + device id or one list?

)

vendor_device_id = devices[0]
assert vendor_device_id.vendor_id.strip()
vendor_id = vendor_device_id.vendor_id.strip()

vendor_device_id = vendor_device_list[0]
assert vendor_device_id.vendor_id.strip()
vendor_id = vendor_device_id.vendor_id.strip()
assert vendor_device_id.device_id.strip()
device_id = vendor_device_id.device_id.strip()

assert vendor_device_id.device_id.strip()
device_id = vendor_device_id.device_id.strip()
self.create_device_pool(
pool_type=config.type,
vendor_id=vendor_id,
device_id=device_id,
)
elif isinstance(devices, dict):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be PciAddressIdentifier instead of dict ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will be dict when it runbook loads it. PciAddressIdentifier is getting treated as dict.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If thats the case, why use isinstance(d, VendorDeviceIdIdentifier) in line 60 ?

bdf_list = devices.get("pci_bdf", [])
assert bdf_list, "Key not found: 'pci_bdf'"
pci_addr_list: List[str] = cast(List[str], bdf_list)

self.create_device_pool(
pool_type=config.type,
vendor_id=vendor_id,
device_id=device_id,
)
# Create pool from the list of PCI addresses
self.create_device_pool_from_pci_addresses(
pool_type=config.type,
pci_addr_list=pci_addr_list,
)
else:
raise LisaException(
f"Unknown device identifier of type: {type(devices)}"
f", value: {devices}"
)
15 changes: 12 additions & 3 deletions lisa/sut_orchestrator/util/schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dataclasses import dataclass, field
from enum import Enum
from typing import List
from typing import List, Union, cast

from dataclasses_json import dataclass_json

Expand All @@ -12,17 +12,26 @@ class HostDevicePoolType(Enum):

@dataclass_json()
@dataclass
class DeviceIdentifier:
class VendorDeviceIdIdentifier:
vendor_id: str = ""
device_id: str = ""


@dataclass_json()
@dataclass
class PciAddressIdentifier:
# list of bdf like 0000:3b:00.0 - <domain>:<bus>:<slot>.<fn>
pci_bdf: List[str] = field(default_factory=list)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Identifier should have one BDF. Same as how the VendorDeviceIdIdentifer has one set of (vendor_id, device_id).



# Configuration options for device-passthrough for the VM.
@dataclass_json()
@dataclass
class HostDevicePoolSchema:
type: HostDevicePoolType = HostDevicePoolType.PCI_NIC
devices: List[DeviceIdentifier] = field(default_factory=list)
devices: Union[List[VendorDeviceIdIdentifier], PciAddressIdentifier] = field(
default_factory=lambda: cast(List[VendorDeviceIdIdentifier], [])
)


@dataclass_json()
Expand Down
3 changes: 3 additions & 0 deletions lisa/tools/iperf3.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def run_as_server_async(
use_json_format: bool = False,
one_connection_only: bool = False,
daemon: bool = True,
interface_ip: str = "",
) -> Process:
# -s: run iperf3 as server mode
# -D: run iperf3 as a daemon
Expand All @@ -135,6 +136,8 @@ def run_as_server_async(
cmd += f" -f {report_unit} "
if port:
cmd += f" -p {port} "
if interface_ip:
cmd += f" -B {interface_ip}"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you explain what does -B exactly do ?
Something like server_ip is passed from the client while running the iperf tool. What does the new interface_ip additional do here ?

Copy link
Collaborator Author

@smit-gardhariya smit-gardhariya Mar 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@purna It will run the command and use dedicated interface passed with -B

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One server might have multiple interface and we want to run with dedicated passthrough NIC IP

process = self.node.execute_async(
f"{self.command} {cmd}", shell=True, sudo=True
)
Expand Down
18 changes: 15 additions & 3 deletions lisa/tools/netperf.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,17 @@ def can_install(self) -> bool:
def dependencies(self) -> List[Type[Tool]]:
return [Gcc, Git, Make, Texinfo]

def run_as_server(self, port: int = 30000, daemon: bool = True) -> None:
def run_as_server(
self,
port: int = 30000,
daemon: bool = True,
interface_ip: str = "",
) -> None:
cmd = f"netserver -p {port} "
if not daemon:
cmd += " -D "
if interface_ip:
cmd += f" -L {interface_ip}"
self.node.execute(
cmd,
sudo=True,
Expand All @@ -50,9 +57,11 @@ def run_as_client_async(
test_name: str = "TCP_RR",
seconds: int = 150,
time_unit: int = 1,
interface_ip: str = "",
send_recv_offset: str = "THROUGHPUT, THROUGHPUT_UNITS, MIN_LATENCY, MAX_LATENCY, MEAN_LATENCY, REQUEST_SIZE, RESPONSE_SIZE, STDDEV_LATENCY", # noqa: E501
) -> Process:
# -H: Specify the target machine and/or local ip and family
# -L: Specify the IP for client interface to be used
# -p: Specify netserver port number and/or local port
# -t: Specify test to perform
# -n: Set the number of processors for CPU util
Expand All @@ -61,8 +70,11 @@ def run_as_client_async(
# initial guess for units per second. A negative value for time will make
# heavy use of the system's timestamping functionality
# -O: Set the remote send,recv buffer offset
cmd = (
f"-H {server_ip} -p {port} -t {test_name} -n {core_count} -l {seconds}"
cmd: str = ""
if interface_ip:
cmd += f" -L {interface_ip}"
cmd += (
f" -H {server_ip} -p {port} -t {test_name} -n {core_count} -l {seconds}"
f" -D {time_unit} -- -O '{send_recv_offset}'"
)
process = self.node.execute_async(f"{self.command} {cmd}", sudo=True)
Expand Down
53 changes: 45 additions & 8 deletions microsoft/testsuites/performance/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ def perf_tcp_pps(
test_type: str,
server: Optional[RemoteNode] = None,
client: Optional[RemoteNode] = None,
use_internal_address: bool = False,
) -> None:
# Either server and client are set explicitly or we use the first two nodes
# from the environment. We never combine the two options. We need to specify
Expand All @@ -226,6 +227,14 @@ def perf_tcp_pps(
[lambda: client.tools[Netperf], lambda: server.tools[Netperf]] # type: ignore
)

server_interface_ip: str = ""
client_interface_ip: str = ""
if use_internal_address:
assert server.internal_address, "Server Node: internal address is not set"
assert client.internal_address, "Client Node: internal address is not set"
server_interface_ip = server.internal_address
client_interface_ip = client.internal_address

cpu = client.tools[Lscpu]
core_count = cpu.get_core_count()
if "maxpps" == test_type:
Expand All @@ -235,9 +244,14 @@ def perf_tcp_pps(
else:
ports = range(30000, 30001)
for port in ports:
server_netperf.run_as_server(port)
server_netperf.run_as_server(port, interface_ip=server_interface_ip)
for port in ports:
client_netperf.run_as_client_async(server.internal_address, core_count, port)
client_netperf.run_as_client_async(
server_ip=server.internal_address,
core_count=core_count,
port=port,
interface_ip=client_interface_ip,
)
client_sar = client.tools[Sar]
server_sar = server.tools[Sar]
server_sar.get_statistics_async()
Expand Down Expand Up @@ -446,17 +460,33 @@ def perf_iperf(
connections: List[int],
buffer_length_list: List[int],
udp_mode: bool = False,
server: Optional[RemoteNode] = None,
client: Optional[RemoteNode] = None,
run_with_internal_address: bool = False,
) -> None:
environment = test_result.environment
assert environment, "fail to get environment from testresult"
if server is not None or client is not None:
assert server is not None, "server need to be specified, if client is set"
assert client is not None, "client need to be specified, if server is set"
Comment on lines +467 to +469
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if server or client is sufficient. is not None is redundant.
Same for the assert statements.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pupacha Agree but it was causing issue with flak8 or mypy validation if i remember correctly. So i put it this way.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you post the error message here?

else:
environment = test_result.environment
assert environment, "fail to get environment from testresult"
# set server and client from environment, if not set explicitly
client = cast(RemoteNode, environment.nodes[0])
server = cast(RemoteNode, environment.nodes[1])

client = cast(RemoteNode, environment.nodes[0])
server = cast(RemoteNode, environment.nodes[1])
client_iperf3, server_iperf3 = run_in_parallel(
[lambda: client.tools[Iperf3], lambda: server.tools[Iperf3]]
[lambda: client.tools[Iperf3], lambda: server.tools[Iperf3]] # type: ignore
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is type: ignore needed now ?

)
test_case_name = inspect.stack()[1][3]
iperf3_messages_list: List[Any] = []
server_interface_ip = ""
client_interface_ip = ""
if run_with_internal_address:
server_interface_ip = server.internal_address
client_interface_ip = client.internal_address
assert server_interface_ip, "Server Node: internal address is not set"
assert client_interface_ip, "Client Node: internal address is not set"
Comment on lines +485 to +488
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment as before - assertions should be before assignment.


if udp_mode:
for node in [client, server]:
ssh = node.tools[Ssh]
Expand All @@ -481,7 +511,13 @@ def perf_iperf(
current_server_iperf_instances += 1
server_iperf3_process_list.append(
server_iperf3.run_as_server_async(
current_server_port, "g", 10, True, True, False
port=current_server_port,
report_unit="g",
report_periodic=10,
use_json_format=True,
one_connection_only=True,
daemon=False,
interface_ip=server_interface_ip,
)
)
current_server_port += 1
Expand All @@ -502,6 +538,7 @@ def perf_iperf(
parallel_number=num_threads_p,
ip_version="4",
udp_mode=udp_mode,
client_ip=client_interface_ip,
)
)
current_client_port += 1
Expand Down
Loading
Loading