-
Notifications
You must be signed in to change notification settings - Fork 174
fix(DONT MERGE): duckdb>=1.4.1 typing & warnings
#3189
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 14 commits
3d93746
5d2600a
875ec84
49e96f1
d4df1c0
d110f24
2e91048
d83ffc7
6c65d72
e2d2013
98880d0
54f5030
9c3469c
863901c
98d23e9
9a9314a
3cf4ee4
ef2d4eb
9bb26cf
b4e4980
7b43748
64f6b4f
ed6f6b2
eb8e329
52e0955
e1fe121
fd9de79
f84344c
7246499
aa9e43a
4637e0a
5e2f7da
a58e4ce
c7f474b
cdecbc8
a98a3d9
1409514
fd5c275
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,31 +1,37 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from functools import lru_cache | ||
| from typing import TYPE_CHECKING | ||
| from typing import TYPE_CHECKING, Any | ||
|
|
||
| import duckdb | ||
| from duckdb import Expression | ||
|
|
||
| try: | ||
| import duckdb.sqltypes as duckdb_dtypes | ||
| except ModuleNotFoundError: | ||
| # DuckDB pre 1.3 | ||
| import duckdb.typing as duckdb_dtypes | ||
|
|
||
| from narwhals._utils import Version, isinstance_or_issubclass, zip_strict | ||
| from narwhals.exceptions import ColumnNotFoundError | ||
| import narwhals._duckdb.typing as nw_dd_t | ||
dangotbanned marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| from narwhals._utils import Implementation, Version, isinstance_or_issubclass, zip_strict | ||
| from narwhals.exceptions import ColumnNotFoundError, UnsupportedDTypeError | ||
|
|
||
| if TYPE_CHECKING: | ||
| from collections.abc import Mapping, Sequence | ||
|
|
||
| from duckdb import DuckDBPyRelation | ||
| from duckdb.sqltypes import DuckDBPyType | ||
| from typing_extensions import TypeAlias | ||
|
|
||
| from narwhals._compliant.typing import CompliantLazyFrameAny | ||
| from narwhals._duckdb.dataframe import DuckDBLazyFrame | ||
| from narwhals._duckdb.expr import DuckDBExpr | ||
| from narwhals.dtypes import DType | ||
| from narwhals.typing import IntoDType, TimeUnit | ||
| from narwhals.typing import IntoDType, NonNestedLiteral, TimeUnit | ||
|
|
||
| Incomplete: TypeAlias = Any | ||
|
|
||
| BACKEND_VERSION = Implementation.DUCKDB._backend_version() | ||
| """Static backend version for `duckdb`.""" | ||
|
|
||
| if TYPE_CHECKING or BACKEND_VERSION >= (1, 4): | ||
| from duckdb import sqltypes as duckdb_dtypes | ||
| else: # pragma: no cover | ||
| from duckdb import typing as duckdb_dtypes | ||
|
|
||
| UNITS_DICT = { | ||
| "y": "year", | ||
|
|
@@ -45,8 +51,13 @@ | |
| col = duckdb.ColumnExpression | ||
| """Alias for `duckdb.ColumnExpression`.""" | ||
|
|
||
| lit = duckdb.ConstantExpression | ||
| """Alias for `duckdb.ConstantExpression`.""" | ||
|
|
||
| # TODO @dangotbanned: Raise an issue upstream on `Expression | str` too narrow | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah I notice that as well! It's worse than having
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Welp I found where this ends up (most recent last)
Now I just need to work through
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @FBruzzesi!!!! I wasn't expecting it to support all of this, but here's the proof π from __future__ import annotations
import datetime as dt # noqa: F811
import decimal
import uuid
from typing import Any, Callable, cast # noqa: F811
import duckdb # noqa: F811
import numpy as np # noqa: F811
import pandas as pd
lit = cast("Callable[[Any], duckdb.Expression]", duckdb.ConstantExpression)
def try_lit(*inputs: Any) -> None:
for value in inputs:
print(f"{value}: {lit(value)}")
try_lit(
None,
pd.NaT,
pd.NA,
np.ma.masked,
True,
False,
0,
-999_991,
100_000_000_000_000,
1e98,
decimal.Decimal("2933.957546"),
uuid.uuid5(uuid.NAMESPACE_URL, "hello"),
dt.datetime(2034, 1, 23, 4, 2, 56),
dt.time(1, 2, 3, 4),
dt.date(1023, 3, 1),
dt.timedelta(50),
"i am a string",
bytearray(range(4)),
memoryview(b"i was a bytestring"),
b"i am a bytestring",
["list", "of", "str"],
("tuple", "of", "str"),
{"dict": "str"},
np.arange(10),
np.datetime64(dt.date(2000, 3, 1)),
duckdb.UnsignedIntegerValue(50),
)None: NULL
NaT: NULL
<NA>: NULL
--: NULL
True: true
False: false
0: 0
-999991: -999991
100000000000000: 100000000000000
1e+98: 1e+98
2933.957546: 2933.957546
074171de-bc84-5ea4-b636-1135477620e1: '074171de-bc84-5ea4-b636-1135477620e1'::UUID
2034-01-23 04:02:56: '2034-01-23 04:02:56'::TIMESTAMP
01:02:03.000004: '01:02:03.000004'::TIME
1023-03-01: '1023-03-01'::DATE
50 days, 0:00:00: '50 days'::INTERVAL
i am a string: 'i am a string'
bytearray(b'\x00\x01\x02\x03'): '\x00\x01\x02\x03'::BLOB
<memory at 0x00000154FFEA3100>: 'i was a bytestring'::BLOB
b'i am a bytestring': 'i am a bytestring'::BLOB
['list', 'of', 'str']: ['list', 'of', 'str']
('tuple', 'of', 'str'): ['tuple', 'of', 'str']
{'dict': 'str'}: {'dict': 'str'}
[0 1 2 3 4 5 6 7 8 9]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
2000-03-01: '2000-03-01'::DATE
50: 50
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| def lit(value: Expression | NonNestedLiteral | Sequence[Any]) -> Expression: | ||
| """Alias for `duckdb.ConstantExpression`.""" | ||
| lit_: Incomplete = duckdb.ConstantExpression | ||
| return lit_(value) | ||
|
|
||
|
|
||
| when = duckdb.CaseExpression | ||
| """Alias for `duckdb.CaseExpression`.""" | ||
|
|
@@ -55,6 +66,8 @@ | |
| """Alias for `duckdb.FunctionExpression`.""" | ||
|
|
||
|
|
||
| # TODO @dangotbanned: Investigate `lhs: Expression | str | tuple[str]` | ||
| # Seems incorrect | ||
| def lambda_expr( | ||
| params: str | Expression | tuple[Expression, ...], expr: Expression, / | ||
| ) -> Expression: | ||
|
|
@@ -68,7 +81,8 @@ def lambda_expr( | |
| msg = f"DuckDB>=1.2.0 is required for this operation. Found: DuckDB {duckdb.__version__}" | ||
| raise NotImplementedError(msg) from exc | ||
| args = (params,) if isinstance(params, Expression) else params | ||
| return LambdaExpression(args, expr) | ||
| lambda_expr_: Incomplete = LambdaExpression | ||
| return lambda_expr_(args, expr) | ||
|
|
||
|
|
||
| def concat_str(*exprs: Expression, separator: str = "") -> Expression: | ||
|
|
@@ -135,20 +149,27 @@ def time_zone(self) -> str: | |
|
|
||
|
|
||
| def native_to_narwhals_dtype( | ||
| duckdb_dtype: duckdb_dtypes.DuckDBPyType, | ||
| duckdb_dtype: nw_dd_t.BaseType, version: Version, deferred_time_zone: DeferredTimeZone | ||
| ) -> DType: | ||
| if nw_dd_t.has_children(duckdb_dtype) and not nw_dd_t.is_dtype_decimal(duckdb_dtype): | ||
| return _nested_native_to_narwhals_dtype(duckdb_dtype, version, deferred_time_zone) | ||
| if nw_dd_t.is_dtype_timestamp_with_time_zone(duckdb_dtype): | ||
| return version.dtypes.Datetime(time_zone=deferred_time_zone.time_zone) | ||
| return _non_nested_native_to_narwhals_dtype(duckdb_dtype.id, version) | ||
|
|
||
|
|
||
| def _nested_native_to_narwhals_dtype( | ||
| duckdb_dtype: nw_dd_t._ParentType, | ||
| version: Version, | ||
| deferred_time_zone: DeferredTimeZone, | ||
| ) -> DType: | ||
| duckdb_dtype_id = duckdb_dtype.id | ||
| dtypes = version.dtypes | ||
|
|
||
| # Handle nested data types first | ||
| if duckdb_dtype_id == "list": | ||
| if nw_dd_t.is_dtype_list(duckdb_dtype): | ||
| return dtypes.List( | ||
| native_to_narwhals_dtype(duckdb_dtype.child, version, deferred_time_zone) | ||
| ) | ||
|
|
||
| if duckdb_dtype_id == "struct": | ||
| if nw_dd_t.is_dtype_struct(duckdb_dtype): | ||
| children = duckdb_dtype.children | ||
| return dtypes.Struct( | ||
| [ | ||
|
|
@@ -159,36 +180,31 @@ def native_to_narwhals_dtype( | |
| for child in children | ||
| ] | ||
| ) | ||
|
|
||
| if duckdb_dtype_id == "array": | ||
| if nw_dd_t.is_dtype_array(duckdb_dtype): | ||
| child, size = duckdb_dtype.children | ||
| shape: list[int] = [size[1]] | ||
|
|
||
| while child[1].id == "array": | ||
| while nw_dd_t.is_dtype_array(child[1]): | ||
| child, size = child[1].children | ||
| shape.insert(0, size[1]) | ||
|
|
||
| inner = native_to_narwhals_dtype(child[1], version, deferred_time_zone) | ||
| return dtypes.Array(inner=inner, shape=tuple(shape)) | ||
|
|
||
| if duckdb_dtype_id == "enum": | ||
| if nw_dd_t.is_dtype_enum(duckdb_dtype): | ||
| if version is Version.V1: | ||
| return dtypes.Enum() # type: ignore[call-arg] | ||
| categories = duckdb_dtype.children[0][1] | ||
| return dtypes.Enum(categories=categories) | ||
|
|
||
| if duckdb_dtype_id == "timestamp with time zone": | ||
| return dtypes.Datetime(time_zone=deferred_time_zone.time_zone) | ||
|
|
||
| return _non_nested_native_to_narwhals_dtype(duckdb_dtype_id, version) | ||
| # `MAP`, `UNION` | ||
| raise UnsupportedDTypeError(duckdb_dtype) | ||
dangotbanned marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
|
|
||
| def fetch_rel_time_zone(rel: duckdb.DuckDBPyRelation) -> str: | ||
| result = rel.query( | ||
| "duckdb_settings()", "select value from duckdb_settings() where name = 'TimeZone'" | ||
| ).fetchone() | ||
| assert result is not None # noqa: S101 | ||
| return result[0] # type: ignore[no-any-return] | ||
| return result[0] | ||
|
|
||
|
|
||
| @lru_cache(maxsize=16) | ||
|
|
@@ -222,7 +238,7 @@ def _non_nested_native_to_narwhals_dtype(duckdb_dtype_id: str, version: Version) | |
|
|
||
|
|
||
| dtypes = Version.MAIN.dtypes | ||
| NW_TO_DUCKDB_DTYPES: Mapping[type[DType], duckdb_dtypes.DuckDBPyType] = { | ||
| NW_TO_DUCKDB_DTYPES: Mapping[type[DType], DuckDBPyType] = { | ||
| dtypes.Float64: duckdb_dtypes.DOUBLE, | ||
| dtypes.Float32: duckdb_dtypes.FLOAT, | ||
| dtypes.Binary: duckdb_dtypes.BLOB, | ||
|
|
@@ -241,7 +257,7 @@ def _non_nested_native_to_narwhals_dtype(duckdb_dtype_id: str, version: Version) | |
| dtypes.UInt64: duckdb_dtypes.UBIGINT, | ||
| dtypes.UInt128: duckdb_dtypes.UHUGEINT, | ||
| } | ||
| TIME_UNIT_TO_TIMESTAMP: Mapping[TimeUnit, duckdb_dtypes.DuckDBPyType] = { | ||
| TIME_UNIT_TO_TIMESTAMP: Mapping[TimeUnit, DuckDBPyType] = { | ||
| "s": duckdb_dtypes.TIMESTAMP_S, | ||
| "ms": duckdb_dtypes.TIMESTAMP_MS, | ||
| "us": duckdb_dtypes.TIMESTAMP, | ||
|
|
@@ -252,7 +268,7 @@ def _non_nested_native_to_narwhals_dtype(duckdb_dtype_id: str, version: Version) | |
|
|
||
| def narwhals_to_native_dtype( # noqa: PLR0912, C901 | ||
| dtype: IntoDType, version: Version, deferred_time_zone: DeferredTimeZone | ||
| ) -> duckdb_dtypes.DuckDBPyType: | ||
| ) -> DuckDBPyType: | ||
| dtypes = version.dtypes | ||
| base_type = dtype.base_type() | ||
| if duckdb_type := NW_TO_DUCKDB_DTYPES.get(base_type): | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.