20 #include <cudf/detail/iterator.cuh>
21 #include <cudf/detail/utilities/algorithm.cuh>
22 #include <cudf/detail/utilities/assert.cuh>
23 #include <cudf/hashing/detail/default_hash.cuh>
24 #include <cudf/hashing/detail/hashing.hpp>
25 #include <cudf/lists/detail/dremel.hpp>
26 #include <cudf/lists/list_device_view.cuh>
27 #include <cudf/lists/lists_column_device_view.cuh>
29 #include <cudf/structs/structs_column_device_view.cuh>
30 #include <cudf/table/row_operators.cuh>
36 #include <cuda/std/functional>
37 #include <cuda/std/limits>
38 #include <cuda/std/optional>
39 #include <cuda/std/tuple>
40 #include <cuda/std/utility>
41 #include <thrust/detail/use_default.h>
42 #include <thrust/equal.h>
43 #include <thrust/execution_policy.h>
44 #include <thrust/functional.h>
45 #include <thrust/iterator/counting_iterator.h>
46 #include <thrust/iterator/iterator_adaptor.h>
47 #include <thrust/iterator/iterator_categories.h>
48 #include <thrust/iterator/iterator_facade.h>
49 #include <thrust/iterator/transform_iterator.h>
50 #include <thrust/logical.h>
51 #include <thrust/swap.h>
52 #include <thrust/transform_reduce.h>
55 #include <type_traits>
57 namespace CUDF_EXPORT
cudf {
59 namespace experimental {
76 template <cudf::type_
id t>
79 using type = std::conditional_t<t == type_id::STRUCT or t == type_id::LIST, void, id_to_type<t>>;
100 template <
typename Index,
typename Underlying = std::underlying_type_t<Index>>
104 thrust::random_access_traversal_tag,
108 thrust::iterator_adaptor<strong_index_iterator<Index>, Index>;
117 friend class thrust::iterator_core_access;
120 __device__ constexpr
void increment() { ++begin; }
121 __device__ constexpr
void decrement() { --begin; }
123 __device__ constexpr
void advance(Underlying n) { begin += n; }
125 __device__ constexpr
bool equal(strong_index_iterator<Index>
const& other)
const noexcept
127 return begin == other.begin;
130 __device__ constexpr Index dereference() const noexcept {
return static_cast<Index
>(begin); }
132 __device__ constexpr Underlying distance_to(
133 strong_index_iterator<Index>
const& other)
const noexcept
135 return other.begin - begin;
144 using lhs_iterator = strong_index_iterator<lhs_index_type>;
149 using rhs_iterator = strong_index_iterator<rhs_index_type>;
151 namespace lexicographic {
168 template <
typename Element>
171 return detail::compare_elements(lhs, rhs);
188 template <
typename Element, CUDF_ENABLE_IF(not std::is_
floating_po
int_v<Element>)>
191 return detail::compare_elements(lhs, rhs);
201 template <
typename Element, CUDF_ENABLE_IF(std::is_
floating_po
int_v<Element>)>
205 return isnan(rhs) ? weak_ordering::EQUIVALENT : weak_ordering::GREATER;
206 }
else if (isnan(rhs)) {
207 return weak_ordering::LESS;
210 return detail::compare_elements(lhs, rhs);
214 using optional_dremel_view = cuda::std::optional<detail::dremel_device_view const>;
263 template <
bool has_nested_columns,
265 typename PhysicalElementComparator = sorting_physical_element_comparator>
299 PhysicalElementComparator comparator = {}) noexcept
302 _l_dremel(l_dremel_device_views),
303 _r_dremel(r_dremel_device_views),
304 _check_nulls{check_nulls},
306 _column_order{column_order},
307 _null_precedence{null_precedence},
308 _comparator{comparator}
330 template <
bool nested_disable = not has_nested_columns, CUDF_ENABLE_IF(nested_disable)>
337 PhysicalElementComparator comparator = {}) noexcept
342 _check_nulls{check_nulls},
344 _column_order{column_order},
345 _null_precedence{null_precedence},
346 _comparator{comparator}
374 null_order null_precedence = null_order::BEFORE,
376 PhysicalElementComparator comparator = {},
377 optional_dremel_view l_dremel_device_view = {},
378 optional_dremel_view r_dremel_device_view = {})
381 _check_nulls{check_nulls},
382 _null_precedence{null_precedence},
384 _l_dremel_device_view{l_dremel_device_view},
385 _r_dremel_device_view{r_dremel_device_view},
386 _comparator{comparator}
398 template <
typename Element,
399 CUDF_ENABLE_IF(cudf::is_relationally_comparable<Element, Element>())>
404 bool const lhs_is_null{_lhs.is_null(lhs_element_index)};
405 bool const rhs_is_null{_rhs.is_null(rhs_element_index)};
407 if (lhs_is_null or rhs_is_null) {
408 return cuda::std::pair(
null_compare(lhs_is_null, rhs_is_null, _null_precedence), _depth);
412 return cuda::std::pair(_comparator(_lhs.element<Element>(lhs_element_index),
413 _rhs.element<Element>(rhs_element_index)),
414 cuda::std::numeric_limits<int>::max());
424 template <
typename Element,
425 CUDF_ENABLE_IF(not cudf::is_relationally_comparable<Element, Element>() and
426 (not has_nested_columns or not cudf::is_nested<Element>()))>
430 CUDF_UNREACHABLE(
"Attempted to compare elements of uncomparable types.");
441 template <
typename Element,
442 CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::struct_view>)>
449 while (lcol.
type().
id() == type_id::STRUCT) {
450 bool const lhs_is_null{lcol.
is_null(lhs_element_index)};
451 bool const rhs_is_null{rcol.
is_null(rhs_element_index)};
453 if (lhs_is_null or rhs_is_null) {
455 return cuda::std::pair(state, depth);
459 return cuda::std::pair(weak_ordering::EQUIVALENT, cuda::std::numeric_limits<int>::max());
468 return cudf::type_dispatcher<dispatch_void_if_nested>(
483 template <
typename Element,
484 CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::list_view>)>
489 auto const is_l_row_null = _lhs.is_null(lhs_element_index);
490 auto const is_r_row_null = _rhs.is_null(rhs_element_index);
491 if (is_l_row_null || is_r_row_null) {
492 return cuda::std::pair(
null_compare(is_l_row_null, is_r_row_null, _null_precedence),
497 auto const l_max_def_level = _l_dremel_device_view->max_def_level;
498 auto const r_max_def_level = _r_dremel_device_view->max_def_level;
499 auto const l_def_levels = _l_dremel_device_view->def_levels;
500 auto const r_def_levels = _r_dremel_device_view->def_levels;
501 auto const l_rep_levels = _l_dremel_device_view->rep_levels;
502 auto const r_rep_levels = _r_dremel_device_view->rep_levels;
509 while (lcol.
type().
id() == type_id::LIST) {
517 auto const l_offsets = _l_dremel_device_view->offsets;
518 auto const r_offsets = _r_dremel_device_view->offsets;
519 auto l_start = l_offsets[lhs_element_index];
520 auto l_end = l_offsets[lhs_element_index + 1];
521 auto r_start = r_offsets[rhs_element_index];
522 auto r_end = r_offsets[rhs_element_index + 1];
532 for (
int l_dremel_index = l_start, r_dremel_index = r_start, element_index = 0;
533 l_dremel_index < l_end and r_dremel_index < r_end;
534 ++l_dremel_index, ++r_dremel_index) {
535 auto const l_rep_level = l_rep_levels[l_dremel_index];
536 auto const r_rep_level = r_rep_levels[r_dremel_index];
539 if (l_rep_level != r_rep_level) {
541 return l_rep_level < r_rep_level ? cuda::std::pair(weak_ordering::LESS, _depth)
542 : cuda::std::pair(weak_ordering::GREATER, _depth);
546 auto const l_def_level = l_def_levels[l_dremel_index];
547 auto const r_def_level = r_def_levels[r_dremel_index];
550 if (l_def_level < l_max_def_level || r_def_level < r_max_def_level) {
557 if ((lcol.
nullable() and l_def_levels[l_dremel_index] == l_max_def_level - 1) or
558 (rcol.
nullable() and r_def_levels[r_dremel_index] == r_max_def_level - 1)) {
561 if (l_def_level == r_def_level) {
continue; }
564 return l_def_level < r_def_level ? cuda::std::pair(weak_ordering::LESS, _depth)
565 : cuda::std::pair(weak_ordering::GREATER, _depth);
570 int last_null_depth = _depth;
571 cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher<dispatch_void_if_nested>(
572 lcol.
type(), comparator, element_index, element_index);
573 if (state != weak_ordering::EQUIVALENT) {
return cuda::std::pair(state, _depth); }
584 return cuda::std::pair(detail::compare_elements(l_end - l_start, r_end - r_start), _depth);
590 Nullate
const _check_nulls;
593 optional_dremel_view _l_dremel_device_view;
594 optional_dremel_view _r_dremel_device_view;
595 PhysicalElementComparator
const _comparator;
609 size_type const rhs_index)
const noexcept
611 int last_null_depth = cuda::std::numeric_limits<int>::max();
613 for (
size_type i = 0; i < _lhs.num_columns(); ++i) {
614 if (_lhs.column(i).type().id() == type_id::LIST) { ++list_column_index; }
616 int const depth = _depth.has_value() ? (*_depth)[i] : 0;
617 if (depth > last_null_depth) {
continue; }
619 bool const ascending =
620 _column_order.has_value() ? (*_column_order)[i] == order::ASCENDING :
true;
623 _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE;
628 auto const [l_dremel_i, r_dremel_i] =
629 _lhs.column(i).type().id() == type_id::LIST
630 ? cuda::std::make_tuple(optional_dremel_view(_l_dremel[list_column_index]),
631 optional_dremel_view(_r_dremel[list_column_index]))
632 : cuda::std::make_tuple(optional_dremel_view{}, optional_dremel_view{});
644 cuda::std::tie(state, last_null_depth) =
647 if (state == weak_ordering::EQUIVALENT) {
continue; }
651 : (state == weak_ordering::GREATER ? weak_ordering::LESS : weak_ordering::GREATER);
653 return weak_ordering::EQUIVALENT;
661 Nullate
const _check_nulls;
662 cuda::std::optional<device_span<int const>>
const _depth;
663 cuda::std::optional<device_span<order const>>
const _column_order;
664 cuda::std::optional<device_span<null_order const>>
const _null_precedence;
665 PhysicalElementComparator
const _comparator;
679 struct weak_ordering_comparator_impl {
680 static_assert(not((weak_ordering::EQUIVALENT == values) && ...),
681 "weak_ordering_comparator should not be used for pure equality comparisons. The "
682 "`row_equality_comparator` should be used instead");
684 template <
typename LhsType,
typename RhsType>
685 __device__ constexpr
bool operator()(LhsType
const lhs_index,
686 RhsType
const rhs_index)
const noexcept
688 weak_ordering const result = comparator(lhs_index, rhs_index);
689 return ((result == values) || ...);
691 Comparator
const comparator;
700 template <
typename Comparator>
701 struct less_comparator : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS> {
708 : weak_ordering_comparator_impl<Comparator,
weak_ordering::LESS>{comparator}
720 template <
typename Comparator>
722 : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS, weak_ordering::EQUIVALENT> {
792 static std::pair<std::shared_ptr<preprocessed_table>, std::shared_ptr<preprocessed_table>>
create(
822 static std::shared_ptr<preprocessed_table> create(
824 std::vector<int>&& verticalized_col_depths,
825 std::vector<std::unique_ptr<column>>&& transformed_columns,
828 bool has_ranked_children,
861 std::vector<detail::dremel_data>&& dremel_data,
863 std::vector<std::unique_ptr<column>>&& transformed_columns,
864 bool has_ranked_children);
870 std::vector<std::unique_ptr<column>>&& transformed_columns,
871 bool has_ranked_children);
886 [[nodiscard]] cuda::std::optional<device_span<order const>> column_order()
const
888 return _column_order.size() ? cuda::std::optional<device_span<order const>>(_column_order)
889 : cuda::std::nullopt;
899 [[nodiscard]] cuda::std::optional<device_span<null_order const>> null_precedence()
const
901 return _null_precedence.size()
902 ? cuda::std::optional<device_span<null_order const>>(_null_precedence)
903 : cuda::std::nullopt;
914 [[nodiscard]] cuda::std::optional<device_span<int const>> depths()
const
916 return _depths.size() ? cuda::std::optional<device_span<int const>>(_depths)
917 : cuda::std::nullopt;
922 if (_dremel_device_views.has_value()) {
929 template <
typename PhysicalElementComparator>
930 void check_physical_element_comparator()
932 if constexpr (!std::is_same_v<PhysicalElementComparator, sorting_physical_element_comparator>) {
934 "The input table has nested type children and they were transformed using a "
935 "different type of physical element comparator.");
940 table_device_view_owner
const _t;
946 cuda::std::optional<std::vector<detail::dremel_data>> _dremel_data;
947 cuda::std::optional<rmm::device_uvector<detail::dremel_device_view>> _dremel_device_views;
951 std::vector<std::unique_ptr<column>> _transformed_columns;
955 bool const _has_ranked_children;
991 : d_t{preprocessed_table::create(t, column_order, null_precedence, stream)}
1034 template <
bool has_nested_columns,
1037 auto less(Nullate
nullate = {}, PhysicalElementComparator comparator = {})
const
1039 d_t->check_physical_element_comparator<PhysicalElementComparator>();
1041 return less_comparator{
1042 device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1046 d_t->dremel_device_views(),
1047 d_t->dremel_device_views(),
1049 d_t->column_order(),
1050 d_t->null_precedence(),
1055 template <
bool has_nested_columns,
1057 typename PhysicalElementComparator = sorting_physical_element_comparator>
1060 d_t->check_physical_element_comparator<PhysicalElementComparator>();
1062 return less_equivalent_comparator{
1063 device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1067 d_t->dremel_device_views(),
1068 d_t->dremel_device_views(),
1070 d_t->column_order(),
1071 d_t->null_precedence(),
1076 std::shared_ptr<preprocessed_table> d_t;
1080 template <
typename Comparator>
1081 struct strong_index_comparator_adapter {
1082 strong_index_comparator_adapter(Comparator
const& comparator) : comparator{comparator} {}
1084 __device__ constexpr
weak_ordering operator()(lhs_index_type
const lhs_index,
1085 rhs_index_type
const rhs_index)
const noexcept
1091 __device__ constexpr
weak_ordering operator()(rhs_index_type
const rhs_index,
1092 lhs_index_type
const lhs_index)
const noexcept
1094 auto const left_right_ordering =
1098 if (left_right_ordering == weak_ordering::LESS) {
1099 return weak_ordering::GREATER;
1100 }
else if (left_right_ordering == weak_ordering::GREATER) {
1101 return weak_ordering::LESS;
1103 return weak_ordering::EQUIVALENT;
1106 Comparator
const comparator;
1165 std::shared_ptr<preprocessed_table> right)
1166 : d_left_table{std::move(left)}, d_right_table{std::move(right)}
1206 template <
bool has_nested_columns,
1209 auto less(Nullate
nullate = {}, PhysicalElementComparator comparator = {})
const
1211 d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
1212 d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
1214 return less_comparator{strong_index_comparator_adapter{
1215 device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1219 d_left_table->dremel_device_views(),
1220 d_right_table->dremel_device_views(),
1221 d_left_table->depths(),
1222 d_left_table->column_order(),
1223 d_left_table->null_precedence(),
1230 typename PhysicalElementComparator = sorting_physical_element_comparator>
1233 d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
1234 d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
1236 return less_equivalent_comparator{strong_index_comparator_adapter{
1237 device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1241 d_left_table->dremel_device_views(),
1242 d_right_table->dremel_device_views(),
1243 d_left_table->depths(),
1244 d_left_table->column_order(),
1245 d_left_table->null_precedence(),
1250 std::shared_ptr<preprocessed_table> d_left_table;
1251 std::shared_ptr<preprocessed_table> d_right_table;
1260 namespace equality {
1277 template <
typename Element>
1278 __device__ constexpr
bool operator()(Element
const lhs, Element
const rhs)
const noexcept
1296 template <
typename Element, CUDF_ENABLE_IF(not std::is_
floating_po
int_v<Element>)>
1297 __device__ constexpr
bool operator()(Element
const lhs, Element
const rhs)
const noexcept
1311 template <
typename Element, CUDF_ENABLE_IF(std::is_
floating_po
int_v<Element>)>
1312 __device__ constexpr
bool operator()(Element
const lhs, Element
const rhs)
const noexcept
1314 return isnan(lhs) and isnan(rhs) ? true : lhs == rhs;
1339 template <
bool has_nested_columns,
1341 typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
1356 size_type const rhs_index)
const noexcept
1361 element_comparator{check_nulls, l, r, nulls_are_equal, comparator},
1366 return thrust::equal(thrust::seq, lhs.
begin(), lhs.
end(), rhs.
begin(), equal_elements);
1384 PhysicalEqualityComparator comparator = {}) noexcept
1387 check_nulls{check_nulls},
1388 nulls_are_equal{nulls_are_equal},
1389 comparator{comparator}
1396 class element_comparator {
1410 __device__ element_comparator(Nullate check_nulls,
1411 column_device_view lhs,
1412 column_device_view rhs,
1414 PhysicalEqualityComparator comparator = {}) noexcept
1417 check_nulls{check_nulls},
1418 nulls_are_equal{nulls_are_equal},
1419 comparator{comparator}
1431 template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
1432 __device__
bool operator()(
size_type const lhs_element_index,
1433 size_type const rhs_element_index)
const noexcept
1436 bool const lhs_is_null{lhs.is_null(lhs_element_index)};
1437 bool const rhs_is_null{rhs.is_null(rhs_element_index)};
1438 if (lhs_is_null and rhs_is_null) {
1439 return nulls_are_equal == null_equality::EQUAL;
1440 }
else if (lhs_is_null != rhs_is_null) {
1445 return comparator(lhs.element<Element>(lhs_element_index),
1446 rhs.element<Element>(rhs_element_index));
1449 template <
typename Element,
1450 CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>() and
1451 (not has_nested_columns or not cudf::is_nested<Element>())),
1453 __device__
bool operator()(Args...)
1455 CUDF_UNREACHABLE(
"Attempted to compare elements of uncomparable types.");
1458 template <typename Element, CUDF_ENABLE_IF(has_nested_columns and cudf::is_nested<Element>())>
1459 __device__
bool operator()(
size_type const lhs_element_index,
1460 size_type const rhs_element_index)
const noexcept
1462 column_device_view lcol = lhs.slice(lhs_element_index, 1);
1463 column_device_view rcol = rhs.slice(rhs_element_index, 1);
1464 while (lcol.type().id() == type_id::STRUCT || lcol.type().id() == type_id::LIST) {
1466 auto lvalid = detail::make_validity_iterator<true>(lcol);
1467 auto rvalid = detail::make_validity_iterator<true>(rcol);
1468 if (nulls_are_equal == null_equality::UNEQUAL) {
1470 thrust::seq, lvalid, lvalid + lcol.size(), cuda::std::logical_not<bool>()) or
1472 thrust::seq, rvalid, rvalid + rcol.size(), cuda::std::logical_not<bool>())) {
1476 if (not thrust::equal(thrust::seq, lvalid, lvalid + lcol.size(), rvalid)) {
1481 if (lcol.type().id() == type_id::STRUCT) {
1482 if (lcol.num_child_columns() == 0) {
return true; }
1484 lcol = detail::structs_column_device_view(lcol).get_sliced_child(0);
1485 rcol = detail::structs_column_device_view(rcol).get_sliced_child(0);
1486 }
else if (lcol.type().id() == type_id::LIST) {
1487 auto l_list_col = detail::lists_column_device_view(lcol);
1488 auto r_list_col = detail::lists_column_device_view(rcol);
1492 if (not thrust::equal(thrust::seq, lsizes, lsizes + lcol.size(), rsizes)) {
1496 lcol = l_list_col.get_sliced_child();
1497 rcol = r_list_col.get_sliced_child();
1498 if (lcol.size() != rcol.size()) {
return false; }
1502 auto comp = column_comparator{
1503 element_comparator{check_nulls, lcol, rcol, nulls_are_equal, comparator}, lcol.size()};
1504 return type_dispatcher<dispatch_void_if_nested>(lcol.type(), comp);
1515 struct column_comparator {
1516 element_comparator
const comp;
1524 template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
1525 __device__
bool operator()()
const noexcept
1527 return thrust::all_of(thrust::seq,
1528 thrust::make_counting_iterator(0),
1529 thrust::make_counting_iterator(0) + size,
1530 [=](
auto i) {
return comp.template operator()<Element>(i, i); });
1533 template <
typename Element,
1534 CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>()),
1536 __device__
bool operator()(Args...) const noexcept
1538 CUDF_UNREACHABLE(
"Attempted to compare elements of uncomparable types.");
1542 column_device_view
const lhs;
1543 column_device_view
const rhs;
1544 Nullate
const check_nulls;
1546 PhysicalEqualityComparator
const comparator;
1549 table_device_view
const lhs;
1550 table_device_view
const rhs;
1551 Nullate
const check_nulls;
1553 PhysicalEqualityComparator
const comparator;
1581 using table_device_view_owner =
1585 std::vector<rmm::device_buffer>&& null_buffers,
1586 std::vector<std::unique_ptr<column>>&& tmp_columns)
1587 : _t(std::move(
table)),
1588 _null_buffers(std::move(null_buffers)),
1589 _tmp_columns(std::move(tmp_columns))
1600 table_device_view_owner _t;
1601 std::vector<rmm::device_buffer> _null_buffers;
1602 std::vector<std::unique_ptr<column>> _tmp_columns;
1660 template <
bool has_nested_columns,
1665 PhysicalEqualityComparator comparator = {})
const noexcept
1667 return device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>{
1668 nullate, *d_t, *d_t, nulls_are_equal, comparator};
1672 std::shared_ptr<preprocessed_table> d_t;
1676 template <
typename Comparator>
1677 struct strong_index_comparator_adapter {
1678 strong_index_comparator_adapter(Comparator
const& comparator) : comparator{comparator} {}
1680 __device__ constexpr
bool operator()(lhs_index_type
const lhs_index,
1681 rhs_index_type
const rhs_index)
const noexcept
1687 __device__ constexpr
bool operator()(rhs_index_type
const rhs_index,
1688 lhs_index_type
const lhs_index)
const noexcept
1690 return this->operator()(lhs_index, rhs_index);
1693 Comparator
const comparator;
1739 std::shared_ptr<preprocessed_table> right)
1740 : d_left_table{std::move(left)}, d_right_table{std::move(right)}
1774 template <
bool has_nested_columns,
1779 PhysicalEqualityComparator comparator = {})
const noexcept
1781 return strong_index_comparator_adapter{
1782 device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>(
1783 nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)};
1787 std::shared_ptr<preprocessed_table> d_left_table;
1788 std::shared_ptr<preprocessed_table> d_right_table;
1801 template <
template <
typename>
class hash_function,
typename Nullate>
1813 uint32_t seed = DEFAULT_HASH_SEED,
1814 hash_value_type null_hash = cuda::std::numeric_limits<hash_value_type>::max()) noexcept
1815 : _check_nulls(nulls), _seed(seed), _null_hash(null_hash)
1827 template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
1831 if (_check_nulls && col.
is_null(row_index)) {
return _null_hash; }
1832 return hash_function<T>{_seed}(col.
element<T>(row_index));
1843 template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
1847 CUDF_UNREACHABLE(
"Unsupported type in hash.");
1861 template <
template <
typename>
class hash_function,
typename Nullate>
1874 auto it = thrust::make_transform_iterator(_table.begin(), [=](
auto const&
column) {
1875 return cudf::type_dispatcher<dispatch_storage_type>(
1877 element_hasher_adapter<hash_function>{_check_nulls, _seed},
1883 return detail::accumulate(it, it + _table.num_columns(), _seed, [](
auto hash,
auto h) {
1884 return cudf::hashing::detail::hash_combine(hash, h);
1896 template <
template <
typename>
class hash_fn>
1897 class element_hasher_adapter {
1898 static constexpr
hash_value_type NULL_HASH = cuda::std::numeric_limits<hash_value_type>::max();
1902 __device__ element_hasher_adapter(Nullate check_nulls, uint32_t seed) noexcept
1903 : _element_hasher(check_nulls, seed), _check_nulls(check_nulls)
1907 template <typename T, CUDF_ENABLE_IF(not cudf::is_nested<T>())>
1911 return _element_hasher.template operator()<T>(col, row_index);
1914 template <typename T, CUDF_ENABLE_IF(cudf::is_nested<T>())>
1919 column_device_view curr_col = col.
slice(row_index, 1);
1920 while (curr_col.type().id() == type_id::STRUCT || curr_col.type().id() == type_id::LIST) {
1922 auto validity_it = detail::make_validity_iterator<true>(curr_col);
1923 hash = detail::accumulate(
1924 validity_it, validity_it + curr_col.size(), hash, [](
auto hash,
auto is_valid) {
1925 return cudf::hashing::detail::hash_combine(hash,
1926 is_valid ? NON_NULL_HASH : NULL_HASH);
1929 if (curr_col.type().id() == type_id::STRUCT) {
1930 if (curr_col.num_child_columns() == 0) {
return hash; }
1932 curr_col = detail::structs_column_device_view(curr_col).get_sliced_child(0);
1933 }
else if (curr_col.type().id() == type_id::LIST) {
1934 auto list_col = detail::lists_column_device_view(curr_col);
1936 hash = detail::accumulate(
1937 list_sizes, list_sizes + list_col.size(), hash, [](
auto hash,
auto size) {
1938 return cudf::hashing::detail::hash_combine(hash, hash_fn<size_type>{}(size));
1940 curr_col = list_col.get_sliced_child();
1943 for (
int i = 0; i < curr_col.size(); ++i) {
1944 hash = cudf::hashing::detail::hash_combine(
1946 type_dispatcher<dispatch_void_if_nested>(curr_col.type(), _element_hasher, curr_col, i));
1951 element_hasher<hash_fn, Nullate>
const _element_hasher;
1952 Nullate
const _check_nulls;
1956 table_device_view t,
1957 uint32_t seed = DEFAULT_HASH_SEED) noexcept
1958 : _check_nulls{check_nulls}, _table{t}, _seed(seed)
1962 Nullate
const _check_nulls;
1963 table_device_view
const _table;
1964 uint32_t
const _seed;
1970 using preprocessed_table = row::equality::preprocessed_table;
1999 row_hasher(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
2013 template <
template <
typename>
class hash_function = cudf::hashing::detail::default_hash,
2014 template <
template <
typename>
class,
typename>
2018 uint32_t seed = DEFAULT_HASH_SEED)
const
2020 return DeviceRowHasher<hash_function, Nullate>(
nullate, *d_t, seed);
2024 std::shared_ptr<preprocessed_table> d_t;
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
T element(size_type element_index) const noexcept
Returns reference to element at the specified index.
CUDF_HOST_DEVICE column_device_view slice(size_type offset, size_type size) const noexcept
Get a new column_device_view which is a slice of this column.
CUDF_HOST_DEVICE size_type num_child_columns() const noexcept
Returns the number of child columns.
const_iterator< T > begin() const
Return an iterator to the first element of the column.
const_iterator< T > end() const
Returns an iterator to the element following the last element of the column.
A container of nullable device data as a column of elements.
constexpr CUDF_HOST_DEVICE type_id id() const noexcept
Returns the type identifier.
CUDF_HOST_DEVICE data_type type() const noexcept
Returns the element type.
bool is_null(size_type element_index) const noexcept
Returns whether the specified element is null.
CUDF_HOST_DEVICE bool nullable() const noexcept
Indicates whether the column can contain null elements, i.e., if it has an allocated bitmask.
Given a column_device_view, an instance of this class provides a wrapper on this compound column for ...
column_device_view get_sliced_child() const
Fetches the child column of the underlying list column with offset and size applied.
Given a column_device_view, an instance of this class provides a wrapper on this compound column for ...
column_device_view get_sliced_child(size_type idx) const
Fetches the child column of the underlying struct column.
Computes the equality comparison between 2 rows.
constexpr bool operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table is equal to the row at rhs_index in the rhs tabl...
Comparator for performing equality comparisons between two rows of the same table.
self_comparator(std::shared_ptr< preprocessed_table > t)
Construct an owning object for performing equality comparisons between two rows of the same table.
self_comparator(table_view const &t, rmm::cuda_stream_view stream)
Construct an owning object for performing equality comparisons between two rows of the same table.
auto equal_to(Nullate nullate={}, null_equality nulls_are_equal=null_equality::EQUAL, PhysicalEqualityComparator comparator={}) const noexcept
Get the comparison operator to use on the device.
An owning object that can be used to equality compare rows of two different tables.
two_table_comparator(std::shared_ptr< preprocessed_table > left, std::shared_ptr< preprocessed_table > right)
Construct an owning object for performing equality comparisons between two rows from two tables.
auto equal_to(Nullate nullate={}, null_equality nulls_are_equal=null_equality::EQUAL, PhysicalEqualityComparator comparator={}) const noexcept
Return the binary operator for comparing rows in the table.
two_table_comparator(table_view const &left, table_view const &right, rmm::cuda_stream_view stream)
Construct an owning object for performing equality comparisons between two rows from two tables.
Computes the hash value of a row in the given table.
auto operator()(size_type row_index) const noexcept
Return the hash value of a row in the given table.
Computes the hash value of an element in the given column.
uint32_t _seed
The seed to use for hashing.
element_hasher(Nullate nulls, uint32_t seed=DEFAULT_HASH_SEED, hash_value_type null_hash=cuda::std::numeric_limits< hash_value_type >::max()) noexcept
Constructs an element_hasher object.
Nullate _check_nulls
Whether to check for nulls.
hash_value_type _null_hash
Hash value to use for null elements.
Computes the hash value of a row in the given table.
DeviceRowHasher< hash_function, Nullate > device_hasher(Nullate nullate={}, uint32_t seed=DEFAULT_HASH_SEED) const
Get the hash operator to use on the device.
row_hasher(table_view const &t, rmm::cuda_stream_view stream)
Construct an owning object for hashing the rows of a table.
row_hasher(std::shared_ptr< preprocessed_table > t)
Construct an owning object for hashing the rows of a table from an existing preprocessed_table.
Performs a relational comparison between two elements in two columns.
cuda::std::pair< weak_ordering, int > operator()(size_type lhs_element_index, size_type rhs_element_index)
Compares two list-type columns.
cuda::std::pair< weak_ordering, int > operator()(size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
Performs a relational comparison between the specified elements.
cuda::std::pair< weak_ordering, int > operator()(size_type const, size_type const) const noexcept
Throws run-time error when columns types cannot be compared or if this class is instantiated with has...
element_comparator(Nullate check_nulls, column_device_view lhs, column_device_view rhs, null_order null_precedence=null_order::BEFORE, int depth=0, PhysicalElementComparator comparator={}, optional_dremel_view l_dremel_device_view={}, optional_dremel_view r_dremel_device_view={})
Construct type-dispatched function object for performing a relational comparison between two elements...
Computes the lexicographic comparison between 2 rows.
constexpr weak_ordering operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table compares lexicographically less,...
device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, cuda::std::optional< device_span< order const >> column_order=cuda::std::nullopt, cuda::std::optional< device_span< null_order const >> null_precedence=cuda::std::nullopt, PhysicalElementComparator comparator={}) noexcept
Construct a function object for performing a lexicographic comparison between the rows of two tables....
device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, device_span< detail::dremel_device_view const > l_dremel_device_views, device_span< detail::dremel_device_view const > r_dremel_device_views, cuda::std::optional< device_span< int const >> depth=cuda::std::nullopt, cuda::std::optional< device_span< order const >> column_order=cuda::std::nullopt, cuda::std::optional< device_span< null_order const >> null_precedence=cuda::std::nullopt, PhysicalElementComparator comparator={}) noexcept
Construct a function object for performing a lexicographic comparison between the rows of two tables.
An owning object that can be used to lexicographically compare two rows of the same table.
auto less_equivalent(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
self_comparator(table_view const &t, host_span< order const > column_order={}, host_span< null_order const > null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct an owning object for performing a lexicographic comparison between two rows of the same tab...
self_comparator(std::shared_ptr< preprocessed_table > t)
Construct an owning object for performing a lexicographic comparison between two rows of the same pre...
auto less(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
An owning object that can be used to lexicographically compare rows of two different tables.
two_table_comparator(std::shared_ptr< preprocessed_table > left, std::shared_ptr< preprocessed_table > right)
Construct an owning object for performing a lexicographic comparison between two rows of the same pre...
auto less(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
two_table_comparator(table_view const &left, table_view const &right, host_span< order const > column_order={}, host_span< null_order const > null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct an owning object for performing a lexicographic comparison between rows of two different ta...
auto less_equivalent(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
Table device view that is usable in device memory.
A set of cudf::column_view's of the same size.
A set of cudf::column's of the same size.
Column device view class definitions.
uint32_t hash_value_type
Type of hash value.
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
CUDF_HOST_DEVICE constexpr decltype(auto) __forceinline__ type_dispatcher(cudf::data_type dtype, Functor f, Ts &&... args)
Invokes an operator() template with the type instantiation based on the specified cudf::data_type's i...
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
null_order
Indicates how null values compare against all other values.
null_equality
Enum to consider two nulls as equal or unequal.
int32_t size_type
Row index type for columns and tables.
#define CUDF_ENABLE_IF(...)
Convenience macro for SFINAE as an unnamed template parameter.
weak_ordering
Result type of the element_relational_comparator function object.
@ EQUIVALENT
Indicates a is ordered neither before nor after b
auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
Compare the nulls according to null order.
CUDF_HOST_DEVICE auto make_list_size_iterator(detail::lists_column_device_view const &c)
Makes an iterator that returns size of the list by row index.
Column APIs for sort and rank.
Device version of C++20 std::span with reduced feature set.
A map from cudf::type_id to cudf type that excludes LIST and STRUCT types.
std::conditional_t< t==type_id::STRUCT or t==type_id::LIST, void, id_to_type< t > > type
The type to dispatch to if the type is nested.
Equality comparator functor that compares physical values rather than logical elements like lists,...
constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
Operator for equality comparison of non-floating point values.
Equality comparator functor that compares physical values rather than logical elements like lists,...
constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
Operator for equality comparisons.
Preprocessed table for use with row equality comparison or row hashing.
static std::shared_ptr< preprocessed_table > create(table_view const &table, rmm::cuda_stream_view stream)
Factory to construct preprocessed_table for use with row equality comparison or row hashing.
Wraps and interprets the result of device_row_comparator, true if the result is weak_ordering::LESS m...
less_comparator(Comparator const &comparator)
Constructs a less_comparator.
Wraps and interprets the result of device_row_comparator, true if the result is weak_ordering::LESS o...
less_equivalent_comparator(Comparator const &comparator)
Constructs a less_equivalent_comparator.
Computes a weak ordering of two values with special sorting behavior.
constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
Operator for relational comparisons.
Preprocessed table for use with lexicographical comparison.
std::invoke_result_t< decltype(table_device_view::create), table_view, rmm::cuda_stream_view > table_device_view_owner
Type of table device view owner for the preprocessed table.
static std::pair< std::shared_ptr< preprocessed_table >, std::shared_ptr< preprocessed_table > > create(table_view const &lhs, table_view const &rhs, host_span< order const > column_order, host_span< null_order const > null_precedence, rmm::cuda_stream_view stream)
Preprocess tables for use with lexicographical comparison.
static std::shared_ptr< preprocessed_table > create(table_view const &table, host_span< order const > column_order, host_span< null_order const > null_precedence, rmm::cuda_stream_view stream)
Preprocess table for use with lexicographical comparison.
Relational comparator functor that compares physical values rather than logical elements like lists,...
constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
Operator for relational comparison of non-floating point values.
A counting iterator that uses strongly typed indices bound to tables.
thrust::iterator_adaptor< strong_index_iterator< Index >, Index > super_t
The base class.
constexpr strong_index_iterator(Underlying n)
Constructs a strong index iterator.
C++20 std::span with reduced feature set.
Indicates the presence of nulls at compile-time or runtime.
Table device view class definitions.
bool has_nested_columns(table_view const &table)
Determine if any nested columns exist in a given table.
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.