Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 63 additions & 21 deletions kombu/utils/text.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
"""Text Utilities."""

# flake8: noqa


from __future__ import annotations

import re
from difflib import SequenceMatcher
from typing import Iterable, Iterator

Expand All @@ -14,12 +16,15 @@ def escape_regex(p, white=''):
# type: (str, str) -> str
"""Escape string for use within a regular expression."""
# what's up with re.escape? that code must be neglected or something
return ''.join(c if c.isalnum() or c in white
else ('\\000' if c == '\000' else '\\' + c)
for c in p)
return ''.join(
c if c.isalnum() or c in white else ('\\000' if c == '\000' else '\\' + c)
for c in p
)


def fmatch_iter(needle: str, haystack: Iterable[str], min_ratio: float = 0.6) -> Iterator[tuple[float, str]]:
def fmatch_iter(
needle: str, haystack: Iterable[str], min_ratio: float = 0.6
) -> Iterator[tuple[float, str]]:
"""Fuzzy match: iteratively.

Yields
Expand All @@ -32,39 +37,76 @@ def fmatch_iter(needle: str, haystack: Iterable[str], min_ratio: float = 0.6) ->
yield ratio, key


def fmatch_best(needle: str, haystack: Iterable[str], min_ratio: float = 0.6) -> str | None:
def fmatch_best(
needle: str, haystack: Iterable[str], min_ratio: float = 0.6
) -> str | None:
"""Fuzzy match - Find best match (scalar)."""
try:
return sorted(
fmatch_iter(needle, haystack, min_ratio), reverse=True,
)[0][1]
fmatch_iter(needle, haystack, min_ratio),
reverse=True,
)[
0
][1]
Comment on lines 45 to +50
Copy link

Copilot AI Jul 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using sorted() to find the best match sorts the entire iterable, which is inefficient if only the max is needed. Consider using max(fmatch_iter(...), key=lambda x: x[0], default=(None, None))[1] to avoid constructing a full list.

Copilot uses AI. Check for mistakes.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and this @couzhei

except IndexError:
return None


def version_string_as_tuple(s: str) -> version_info_t:
"""Convert version string to version info tuple."""
v = _unpack_version(*s.split('.'))
# X.Y.3a1 -> (X, Y, 3, 'a1')
if isinstance(v.micro, str):
v = version_info_t(v.major, v.minor, *_splitmicro(*v[2:]))
# X.Y.3a1-40 -> (X, Y, 3, 'a1', '40')
if not v.serial and v.releaselevel and '-' in v.releaselevel:
v = version_info_t(*list(v[0:3]) + v.releaselevel.split('-'))
return v
def version_string_as_tuple(version: str) -> version_info_t:
"""Parse a version string into its components and return a version_info_t tuple.

The version string is expected to follow the pattern:
'major.minor.micro[releaselevel][serial]'. Each component of the version
is extracted and returned as a tuple in the format (major, minor, micro,
releaselevel, serial).

Args
----
version (str): The version string to parse.

Returns
-------
version_info_t: A tuple containing the parsed version components.

Raises
------
ValueError: If the version string is invalid and does not match the expected pattern.
"""
pattern = r'^(\d+)' # catching the major version (mandatory)
pattern += r'(?:\.(\d+))?' # optionally catching the minor version
pattern += r'(?:\.(\d+))?' # optionally catching the micro version
pattern += r'(?:\.?([a-zA-Z+-][\da-zA-Z+-]*))?' # optionally catching the release level (starting with a letter, + or -) after a dot
pattern += r'(?:\.(.*))?' # optionally catching the serial number after a dot

# applying the regex pattern to the input version string
match = re.fullmatch(pattern, version)

if not match:
raise ValueError(f"Invalid version string: {version}")

# extracting the matched groups
major = int(match.group(1))
minor = int(match.group(2)) if match.group(2) else 0
micro = int(match.group(3)) if match.group(3) else 0
releaselevel = match.group(4) if match.group(4) else ''
serial = match.group(5) if match.group(5) else ''

return _unpack_version(major, minor, micro, releaselevel, serial)


def _unpack_version(
major: str,
major: str | int = 0,
minor: str | int = 0,
micro: str | int = 0,
releaselevel: str = '',
serial: str = ''
serial: str = '',
) -> version_info_t:
return version_info_t(int(major), int(minor), micro, releaselevel, serial)
return version_info_t(int(major), int(minor), int(micro), releaselevel, serial)


def _splitmicro(micro: str, releaselevel: str = '', serial: str = '') -> tuple[int, str, str]:
def _splitmicro(
Copy link

Copilot AI Jul 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The _splitmicro helper is no longer referenced by version_string_as_tuple and appears unused. Consider removing it to eliminate dead code.

Copilot uses AI. Check for mistakes.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@couzhei what is your take on this?

micro: str, releaselevel: str = '', serial: str = ''
) -> tuple[int, str, str]:
for index, char in enumerate(micro):
if not char.isdigit():
break
Expand Down
24 changes: 17 additions & 7 deletions t/unit/utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,25 @@

def test_dir():
import kombu

assert dir(kombu)


@pytest.mark.parametrize('version,expected', [
('3', version_info_t(3, 0, 0, '', '')),
('3.3', version_info_t(3, 3, 0, '', '')),
('3.3.1', version_info_t(3, 3, 1, '', '')),
('3.3.1a3', version_info_t(3, 3, 1, 'a3', '')),
('3.3.1.a3.40c32', version_info_t(3, 3, 1, 'a3', '40c32')),
])
@pytest.mark.parametrize(
'version,expected',
[
('3', version_info_t(3, 0, 0, '', '')),
('3.3', version_info_t(3, 3, 0, '', '')),
('3.3.1', version_info_t(3, 3, 1, '', '')),
('3.3.1a3', version_info_t(3, 3, 1, 'a3', '')),
('3.3.1.a3.40c32', version_info_t(3, 3, 1, 'a3', '40c32')),
('4.0.0+beta.3.47.g4f1a05b', version_info_t(
4, 0, 0, '+beta', '3.47.g4f1a05b')),
('4.0.0-beta3.47.g4f1a05b', version_info_t(
4, 0, 0, '-beta3', '47.g4f1a05b')),
('4.0.1-alpha.3+40c32', version_info_t(4, 0, 1, '-alpha', '3+40c32')),
('0+beta3.14159265', version_info_t(0, 0, 0, '+beta3', '14159265')),
Comment on lines +18 to +28
Copy link

Copilot AI Jul 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] Consider using pytest.param with the id parameter for complex version cases to make individual test failures easier to identify.

Copilot uses AI. Check for mistakes.
],
)
def test_version_string_as_tuple(version, expected):
assert version_string_as_tuple(version) == expected
Loading