From 7e5be8eb1ba0b308c4c0646655b01164144e1143 Mon Sep 17 00:00:00 2001 From: Ronan Dunklau Date: Tue, 12 Jan 2016 21:01:05 +0100 Subject: [PATCH] Take various databases support for NULLS ordering Before that, NULLS FIRST, NULLS LAST was systematically appended to every order by clause. Now, The sqlalchemy FDW tries to avoid generating such a clause if it is not necessary. In addition to that, if the remoted database does not support the clause, AND its default behavior differs from the one the query is expected to perform, the sort is not pushed down at all. Thanks to Kyle Lilly for the report ! Fixes #126 --- python/multicorn/sqlalchemyfdw.py | 75 ++++++++++++++++++-- test-2.7/expected/multicorn_alchemy_test.out | 54 ++++++++++++++ test-2.7/sql/multicorn_alchemy_test.sql | 12 ++++ test-3.3/expected/multicorn_alchemy_test.out | 54 ++++++++++++++ 4 files changed, 191 insertions(+), 4 deletions(-) diff --git a/python/multicorn/sqlalchemyfdw.py b/python/multicorn/sqlalchemyfdw.py index 6c6701c98..48bd6d26e 100644 --- a/python/multicorn/sqlalchemyfdw.py +++ b/python/multicorn/sqlalchemyfdw.py @@ -94,6 +94,40 @@ - the set of needed columns is pushed to the remote_side, and only those columns will be fetched. +Sort push-down support +---------------------- + +Since the rules about NULL ordering are different for every database vendor, and +many of them don't support the NULLS FIRST, NULLS LAST clause, this FDW tries +to not generate any NULLS FIRST / LAST clause if the requested order matches +what the remote system would do by default. + +Additionnaly, if it is found that a query can't be executed while keeping the +same NULL ordering (because the remote system doesn't support the NULL ordering +clause), the sort will not be pushed down. + +To check the SQL query that will be sent to the remote system, use EXPLAIN: + +.. code-block:: sql + + postgres=# explain select * from testalchemy order by id DESC NULLS FIRST; + QUERY PLAN + ------------------------------------------------------------------------------------------------------------------ + Foreign Scan on testalchemy (cost=20.00..50000000000.00 rows=100000000 width=500) + Multicorn: SELECT basetable.atimestamp, basetable.anumeric, basetable.adate, basetable.avarchar, basetable.id + FROM basetable ORDER BY basetable.id DESC + (3 lignes) + + Temps : 167,856 ms + postgres=# explain select * from testalchemy order by id DESC NULLS LAST; + QUERY PLAN + ------------------------------------------------------------------------------------------------------------------ + Foreign Scan on testalchemy (cost=20.00..50000000000.00 rows=100000000 width=500) + Multicorn: SELECT basetable.atimestamp, basetable.anumeric, basetable.adate, basetable.avarchar, basetable.id + FROM basetable ORDER BY basetable.id DESC NULLS LAST + (3 lignes) + + Usage example ------------- @@ -121,6 +155,8 @@ from sqlalchemy import create_engine from sqlalchemy.engine.url import make_url, URL from sqlalchemy.sql import select, operators as sqlops, and_ +from sqlalchemy.sql.expression import nullsfirst, nullslast + # Handle the sqlalchemy 0.8 / 0.9 changes try: from sqlalchemy.sql import sqltypes @@ -184,6 +220,14 @@ def _parse_url_from_options(fdw_options): oracle_dialect.NUMBER: NUMERIC } +SORT_SUPPORT = { + 'mssql': {'default': 'lower', 'support': False}, + 'postgresql': {'default': 'higher', 'support': True}, + 'mysql': {'default': 'lower', 'support': False}, + 'oracle': {'default': 'higher', 'support': True}, + 'sqlite': {'default': 'lower', 'support': False} +} + class SqlAlchemyFdw(ForeignDataWrapper): """An SqlAlchemy foreign data wrapper. @@ -220,7 +264,31 @@ def __init__(self, fdw_options, fdw_columns): + def _need_explicit_null_ordering(self, key): + support = SORT_SUPPORT[self.engine.dialect.name] + default = support['default'] + no = None + if key.is_reversed: + no = nullsfirst if default == 'higher' else nullslast + else: + no = nullslast if default == 'higher' else nullsfirst + if key.nulls_first: + if no != nullsfirst: + return nullsfirst + return None + else: + if no != nullslast: + return nullslast + return None + def can_sort(self, sortkeys): + if SORT_SUPPORT.get(self.engine.dialect.name) is None: + # We have no idea about defaults + return [] + can_order_null = SORT_SUPPORT[self.engine.dialect.name]['support'] + if (any((self._need_explicit_null_ordering(x) is not None + for x in sortkeys)) and not can_order_null): + return [] return sortkeys def explain(self, quals, columns, sortkeys=None, verbose=False): @@ -253,10 +321,9 @@ def _build_statement(self, quals, columns, sortkeys): column = column.desc() if sortkey.collate: column = column.collate('"%s"' % sortkey.collate) - if sortkey.nulls_first: - column = column.nullsfirst() - else: - column = column.nullslast() + null_ordering = self._need_explicit_null_ordering(sortkey) + if null_ordering: + column = null_ordering(column) statement = statement.order_by(column) return statement diff --git a/test-2.7/expected/multicorn_alchemy_test.out b/test-2.7/expected/multicorn_alchemy_test.out index 96ebca6ba..bf1970438 100644 --- a/test-2.7/expected/multicorn_alchemy_test.out +++ b/test-2.7/expected/multicorn_alchemy_test.out @@ -136,6 +136,60 @@ select * from testalchemy where id not in (1, 2); 4 | 11-02-1922 | Tue Jan 02 23:12:54 1962 | -3000 | (2 rows) +select * from testalchemy order by avarchar; + id | adate | atimestamp | anumeric | avarchar +----+------------+---------------------------------+----------+-------------- + 3 | 01-02-1972 | Sun Jan 02 16:12:54 1972 | 4000 | another Test + 2 | 03-05-1990 | Mon Mar 02 10:40:18.321023 1998 | 12.2 | Another Test + 1 | 01-01-1980 | Tue Jan 01 11:01:21.132912 1980 | 3.4 | Test + 4 | 11-02-1922 | Tue Jan 02 23:12:54 1962 | -3000 | +(4 rows) + +select * from testalchemy order by avarchar desc; + id | adate | atimestamp | anumeric | avarchar +----+------------+---------------------------------+----------+-------------- + 4 | 11-02-1922 | Tue Jan 02 23:12:54 1962 | -3000 | + 1 | 01-01-1980 | Tue Jan 01 11:01:21.132912 1980 | 3.4 | Test + 2 | 03-05-1990 | Mon Mar 02 10:40:18.321023 1998 | 12.2 | Another Test + 3 | 01-02-1972 | Sun Jan 02 16:12:54 1972 | 4000 | another Test +(4 rows) + +select * from testalchemy order by avarchar desc nulls first; + id | adate | atimestamp | anumeric | avarchar +----+------------+---------------------------------+----------+-------------- + 4 | 11-02-1922 | Tue Jan 02 23:12:54 1962 | -3000 | + 1 | 01-01-1980 | Tue Jan 01 11:01:21.132912 1980 | 3.4 | Test + 2 | 03-05-1990 | Mon Mar 02 10:40:18.321023 1998 | 12.2 | Another Test + 3 | 01-02-1972 | Sun Jan 02 16:12:54 1972 | 4000 | another Test +(4 rows) + +select * from testalchemy order by avarchar desc nulls last; + id | adate | atimestamp | anumeric | avarchar +----+------------+---------------------------------+----------+-------------- + 1 | 01-01-1980 | Tue Jan 01 11:01:21.132912 1980 | 3.4 | Test + 2 | 03-05-1990 | Mon Mar 02 10:40:18.321023 1998 | 12.2 | Another Test + 3 | 01-02-1972 | Sun Jan 02 16:12:54 1972 | 4000 | another Test + 4 | 11-02-1922 | Tue Jan 02 23:12:54 1962 | -3000 | +(4 rows) + +select * from testalchemy order by avarchar nulls first; + id | adate | atimestamp | anumeric | avarchar +----+------------+---------------------------------+----------+-------------- + 4 | 11-02-1922 | Tue Jan 02 23:12:54 1962 | -3000 | + 3 | 01-02-1972 | Sun Jan 02 16:12:54 1972 | 4000 | another Test + 2 | 03-05-1990 | Mon Mar 02 10:40:18.321023 1998 | 12.2 | Another Test + 1 | 01-01-1980 | Tue Jan 01 11:01:21.132912 1980 | 3.4 | Test +(4 rows) + +select * from testalchemy order by avarchar nulls last; + id | adate | atimestamp | anumeric | avarchar +----+------------+---------------------------------+----------+-------------- + 3 | 01-02-1972 | Sun Jan 02 16:12:54 1972 | 4000 | another Test + 2 | 03-05-1990 | Mon Mar 02 10:40:18.321023 1998 | 12.2 | Another Test + 1 | 01-01-1980 | Tue Jan 01 11:01:21.132912 1980 | 3.4 | Test + 4 | 11-02-1922 | Tue Jan 02 23:12:54 1962 | -3000 | +(4 rows) + select count(*) from testalchemy; count ------- diff --git a/test-2.7/sql/multicorn_alchemy_test.sql b/test-2.7/sql/multicorn_alchemy_test.sql index 5db545c7d..81062b863 100644 --- a/test-2.7/sql/multicorn_alchemy_test.sql +++ b/test-2.7/sql/multicorn_alchemy_test.sql @@ -66,6 +66,18 @@ select * from testalchemy where id in (1,2); select * from testalchemy where id not in (1, 2); +select * from testalchemy order by avarchar; + +select * from testalchemy order by avarchar desc; + +select * from testalchemy order by avarchar desc nulls first; + +select * from testalchemy order by avarchar desc nulls last; + +select * from testalchemy order by avarchar nulls first; + +select * from testalchemy order by avarchar nulls last; + select count(*) from testalchemy; DROP EXTENSION multicorn cascade; diff --git a/test-3.3/expected/multicorn_alchemy_test.out b/test-3.3/expected/multicorn_alchemy_test.out index 96ebca6ba..bf1970438 100644 --- a/test-3.3/expected/multicorn_alchemy_test.out +++ b/test-3.3/expected/multicorn_alchemy_test.out @@ -136,6 +136,60 @@ select * from testalchemy where id not in (1, 2); 4 | 11-02-1922 | Tue Jan 02 23:12:54 1962 | -3000 | (2 rows) +select * from testalchemy order by avarchar; + id | adate | atimestamp | anumeric | avarchar +----+------------+---------------------------------+----------+-------------- + 3 | 01-02-1972 | Sun Jan 02 16:12:54 1972 | 4000 | another Test + 2 | 03-05-1990 | Mon Mar 02 10:40:18.321023 1998 | 12.2 | Another Test + 1 | 01-01-1980 | Tue Jan 01 11:01:21.132912 1980 | 3.4 | Test + 4 | 11-02-1922 | Tue Jan 02 23:12:54 1962 | -3000 | +(4 rows) + +select * from testalchemy order by avarchar desc; + id | adate | atimestamp | anumeric | avarchar +----+------------+---------------------------------+----------+-------------- + 4 | 11-02-1922 | Tue Jan 02 23:12:54 1962 | -3000 | + 1 | 01-01-1980 | Tue Jan 01 11:01:21.132912 1980 | 3.4 | Test + 2 | 03-05-1990 | Mon Mar 02 10:40:18.321023 1998 | 12.2 | Another Test + 3 | 01-02-1972 | Sun Jan 02 16:12:54 1972 | 4000 | another Test +(4 rows) + +select * from testalchemy order by avarchar desc nulls first; + id | adate | atimestamp | anumeric | avarchar +----+------------+---------------------------------+----------+-------------- + 4 | 11-02-1922 | Tue Jan 02 23:12:54 1962 | -3000 | + 1 | 01-01-1980 | Tue Jan 01 11:01:21.132912 1980 | 3.4 | Test + 2 | 03-05-1990 | Mon Mar 02 10:40:18.321023 1998 | 12.2 | Another Test + 3 | 01-02-1972 | Sun Jan 02 16:12:54 1972 | 4000 | another Test +(4 rows) + +select * from testalchemy order by avarchar desc nulls last; + id | adate | atimestamp | anumeric | avarchar +----+------------+---------------------------------+----------+-------------- + 1 | 01-01-1980 | Tue Jan 01 11:01:21.132912 1980 | 3.4 | Test + 2 | 03-05-1990 | Mon Mar 02 10:40:18.321023 1998 | 12.2 | Another Test + 3 | 01-02-1972 | Sun Jan 02 16:12:54 1972 | 4000 | another Test + 4 | 11-02-1922 | Tue Jan 02 23:12:54 1962 | -3000 | +(4 rows) + +select * from testalchemy order by avarchar nulls first; + id | adate | atimestamp | anumeric | avarchar +----+------------+---------------------------------+----------+-------------- + 4 | 11-02-1922 | Tue Jan 02 23:12:54 1962 | -3000 | + 3 | 01-02-1972 | Sun Jan 02 16:12:54 1972 | 4000 | another Test + 2 | 03-05-1990 | Mon Mar 02 10:40:18.321023 1998 | 12.2 | Another Test + 1 | 01-01-1980 | Tue Jan 01 11:01:21.132912 1980 | 3.4 | Test +(4 rows) + +select * from testalchemy order by avarchar nulls last; + id | adate | atimestamp | anumeric | avarchar +----+------------+---------------------------------+----------+-------------- + 3 | 01-02-1972 | Sun Jan 02 16:12:54 1972 | 4000 | another Test + 2 | 03-05-1990 | Mon Mar 02 10:40:18.321023 1998 | 12.2 | Another Test + 1 | 01-01-1980 | Tue Jan 01 11:01:21.132912 1980 | 3.4 | Test + 4 | 11-02-1922 | Tue Jan 02 23:12:54 1962 | -3000 | +(4 rows) + select count(*) from testalchemy; count -------