All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
hash.h
Go to the documentation of this file.
1//
2// Copyright 2016 Pixar
3//
4// Licensed under the terms set forth in the LICENSE.txt file available at
5// https://openusd.org/license.
6//
7#ifndef PXR_BASE_TF_HASH_H
8#define PXR_BASE_TF_HASH_H
9
12
13#include "pxr/pxr.h"
14#include "pxr/base/tf/tf.h"
15#include "pxr/base/tf/api.h"
16
17#include <cstring>
18#include <string>
19#include <map>
20#include <memory>
21#include <set>
22#include <typeindex>
23#include <type_traits>
24#include <utility>
25#include <vector>
26
27PXR_NAMESPACE_OPEN_SCOPE
28
29// Support integers.
30template <class HashState, class T>
31std::enable_if_t<std::is_integral<T>::value>
32TfHashAppend(HashState &h, T integral)
33{
34 h.Append(integral);
35}
36
37// Simple metafunction that returns an unsigned integral type given a size in
38// bytes.
39template <size_t Size> struct Tf_SizedUnsignedInt;
40template <> struct Tf_SizedUnsignedInt<1> { using type = uint8_t; };
41template <> struct Tf_SizedUnsignedInt<2> { using type = uint16_t; };
42template <> struct Tf_SizedUnsignedInt<4> { using type = uint32_t; };
43template <> struct Tf_SizedUnsignedInt<8> { using type = uint64_t; };
44
45// Support enums.
46template <class HashState, class Enum>
47std::enable_if_t<std::is_enum<Enum>::value>
48TfHashAppend(HashState &h, Enum e)
49{
50 h.Append(static_cast<std::underlying_type_t<Enum>>(e));
51}
52
53// Support floating point.
54template <class HashState, class T>
55std::enable_if_t<std::is_floating_point<T>::value>
56TfHashAppend(HashState &h, T fp)
57{
58 // We want both positive and negative zero to hash the same, so we have to
59 // check against zero here.
60 typename Tf_SizedUnsignedInt<sizeof(T)>::type intbuf = 0;
61 if (fp != static_cast<T>(0)) {
62 memcpy(&intbuf, &fp, sizeof(T));
63 }
64 h.Append(intbuf);
65}
66
67// Support std::pair.
68template <class HashState, class T, class U>
69inline void
70TfHashAppend(HashState &h, std::pair<T, U> const &p)
71{
72 h.Append(p.first);
73 h.Append(p.second);
74}
75
76// Support std::vector. std::vector<bool> specialized below.
77template <class HashState, class T>
78inline void
79TfHashAppend(HashState &h, std::vector<T> const &vec)
80{
81 static_assert(!std::is_same_v<std::remove_cv_t<T>, bool>,
82 "Unexpected usage of vector of 'bool'."
83 "Expected explicit overload.");
84 h.AppendContiguous(vec.data(), vec.size());
85}
86
87// Support std::vector<bool>.
88template <class HashState>
89inline void
90TfHashAppend(HashState &h, std::vector<bool> const &vec)
91{
92 h.Append(std::hash<std::vector<bool>>{}(vec));
93}
94
95// Support std::set.
96// NOTE: Supporting std::unordered_set is complicated because the traversal
97// order is not guaranteed
98template <class HashState, class T, class Compare>
99inline void
100TfHashAppend(HashState& h, std::set<T, Compare> const &elements)
101{
102 h.AppendRange(std::begin(elements), std::end(elements));
103}
104
105// Support std::map.
106// NOTE: Supporting std::unordered_map is complicated because the traversal
107// order is not guaranteed
108template <class HashState, class Key, class Value, class Compare>
109inline void
110TfHashAppend(HashState& h, std::map<Key, Value, Compare> const &elements)
111{
112 h.AppendRange(std::begin(elements), std::end(elements));
113}
114
115// Support for hashing std::string.
116template <class HashState>
117inline void
118TfHashAppend(HashState &h, const std::string& s)
119{
120 return h.AppendContiguous(s.c_str(), s.length());
121}
122
123// Support for hashing pointers, but we explicitly delete the version for
124// [const] char pointers. See more below.
125template <class HashState, class T>
126inline void
127TfHashAppend(HashState &h, const T* ptr) {
128 return h.Append(reinterpret_cast<uintptr_t>(ptr));
129}
130
131// We refuse to hash [const] char *. You're almost certainly trying to hash the
132// pointed-to string and this will not do that (it will hash the pointer
133// itself). To hash a c-style null terminated string, you can use
134// TfHashAsCStr() to indicate the intent, or use TfHashCString. If you
135// really want to hash the pointer then use static_cast<const void*>(ptr) or
136// TfHashCharPtr.
137template <class HashState>
138inline void TfHashAppend(HashState &h, char const *ptr) = delete;
139template <class HashState>
140inline void TfHashAppend(HashState &h, char *ptr) = delete;
141
145{
146 explicit TfCStrHashWrapper(char const *cstr) : cstr(cstr) {}
147 char const *cstr;
148};
149
158TfHashAsCStr(char const *cstr)
159{
160 return TfCStrHashWrapper(cstr);
161}
162
163template <class HashState>
164inline void TfHashAppend(HashState &h, TfCStrHashWrapper hcstr)
165{
166 return h.AppendContiguous(hcstr.cstr, std::strlen(hcstr.cstr));
167}
168
169// Implementation detail: dispatch based on hash capability: Try TfHashAppend
170// first, otherwise try std::hash, followed by hash_value. We rely on a
171// combination of expression SFINAE and establishing preferred order by passing
172// a 0 constant and having the overloads take int (highest priority), long
173// (next priority) and '...' (lowest priority).
174
175// std::hash version, attempted second.
176template <class HashState, class T>
177inline auto Tf_HashImpl(HashState &h, T &&obj, long)
178 -> decltype(std::hash<typename std::decay<T>::type>()(
179 std::forward<T>(obj)), void())
180{
181 TfHashAppend(
182 h, std::hash<typename std::decay<T>::type>()(std::forward<T>(obj)));
183}
184
185// hash_value, attempted last.
186template <class HashState, class T>
187inline auto Tf_HashImpl(HashState &h, T &&obj, ...)
188 -> decltype(hash_value(std::forward<T>(obj)), void())
189{
190 TfHashAppend(h, hash_value(std::forward<T>(obj)));
191}
192
193// TfHashAppend, attempted first.
194template <class HashState, class T>
195inline auto Tf_HashImpl(HashState &h, T &&obj, int)
196 -> decltype(TfHashAppend(h, std::forward<T>(obj)), void())
197{
198 TfHashAppend(h, std::forward<T>(obj));
199}
200
201// Implementation detail, CRTP base that provides the public interface for hash
202// state implementations.
203template <class Derived>
204class Tf_HashStateAPI
205{
206public:
207 // Append several objects to the hash state.
208 template <class... Args>
209 void Append(Args &&... args) {
210 _AppendImpl(std::forward<Args>(args)...);
211 }
212
213 // Append contiguous objects to the hash state.
214 template <class T>
215 void AppendContiguous(T const *elems, size_t numElems) {
216 this->_AsDerived()._AppendContiguous(elems, numElems);
217 }
218
219 // Append a range of objects to the hash state.
220 template <class Iter>
221 void AppendRange(Iter f, Iter l) {
222 this->_AsDerived()._AppendRange(f, l);
223 }
224
225 // Return the hash code for the current state.
226 size_t GetCode() const {
227 return this->_AsDerived()._GetCode();
228 }
229
230private:
231 template <class T, class... Args>
232 void _AppendImpl(T &&obj, Args &&... rest) {
233 this->_AsDerived()._Append(std::forward<T>(obj));
234 _AppendImpl(std::forward<Args>(rest)...);
235 }
236 void _AppendImpl() const {
237 // base case intentionally empty.
238 }
239
240 Derived &_AsDerived() {
241 return *static_cast<Derived *>(this);
242 }
243
244 Derived const &_AsDerived() const {
245 return *static_cast<Derived const *>(this);
246 }
247};
248
249// Implementation detail, accumulates hashes.
250class Tf_HashState : public Tf_HashStateAPI<Tf_HashState>
251{
252private:
253 friend class Tf_HashStateAPI<Tf_HashState>;
254
255 // Go thru Tf_HashImpl for non-integers.
256 template <class T>
257 std::enable_if_t<!std::is_integral<std::decay_t<T>>::value>
258 _Append(T &&obj) {
259 Tf_HashImpl(*this, std::forward<T>(obj), 0);
260 }
261
262 // Integers bottom out here.
263 template <class T>
264 std::enable_if_t<std::is_integral<T>::value>
265 _Append(T i) {
266 if (!_didOne) {
267 _state = i;
268 _didOne = true;
269 }
270 else {
271 _state = _Combine(_state, i);
272 }
273 }
274
275 // Append contiguous objects.
276 template <class T>
277 std::enable_if_t<std::is_integral<T>::value>
278 _AppendContiguous(T const *elems, size_t numElems) {
279 _AppendBytes(reinterpret_cast<char const *>(elems),
280 numElems * sizeof(T));
281 }
282
283 // Append contiguous objects.
284 template <class T>
285 std::enable_if_t<!std::is_integral<T>::value>
286 _AppendContiguous(T const *elems, size_t numElems) {
287 while (numElems--) {
288 Append(*elems++);
289 }
290 }
291
292 // Append a range.
293 template <class Iter>
294 void _AppendRange(Iter f, Iter l) {
295 while (f != l) {
296 _Append(*f++);
297 }
298 }
299
301 TF_API void _AppendBytes(char const *bytes, size_t numBytes);
302
303 // Return the hash code for the accumulated hash state.
304 size_t _GetCode() const {
305 // This is based on Knuth's multiplicative hash for integers. The
306 // constant is the closest prime to the binary expansion of the inverse
307 // golden ratio. The best way to produce a hash table bucket index from
308 // the result is to shift the result right, since the higher order bits
309 // have the most entropy. But since we can't know the number of buckets
310 // in a table that's using this, we just reverse the byte order instead,
311 // to get the highest entropy bits into the low-order bytes.
312 return _SwapByteOrder(_state * 11400714819323198549ULL);
313 }
314
315 // This turns into a single bswap/pshufb type instruction on most compilers.
316 inline uint64_t
317 _SwapByteOrder(uint64_t val) const {
318 val =
319 ((val & 0xFF00000000000000u) >> 56u) |
320 ((val & 0x00FF000000000000u) >> 40u) |
321 ((val & 0x0000FF0000000000u) >> 24u) |
322 ((val & 0x000000FF00000000u) >> 8u) |
323 ((val & 0x00000000FF000000u) << 8u) |
324 ((val & 0x0000000000FF0000u) << 24u) |
325 ((val & 0x000000000000FF00u) << 40u) |
326 ((val & 0x00000000000000FFu) << 56u);
327 return val;
328 }
329
330 size_t _Combine(size_t x, size_t y) const {
331 // This is our hash combiner. The task is, given two hash codes x and
332 // y, compute a single hash code. One way to do this is to exclusive-or
333 // the two codes together, but this can produce bad results if they
334 // differ by some fixed amount, For example if the input codes are
335 // multiples of 32, and the two codes we're given are N and N + 32 (this
336 // happens commonly when the hashed values are memory addresses) then
337 // the resulting hash codes for successive pairs of these produces many
338 // repeating values (32, 96, 32, XXX, 32, 96, 32, YYY...). That's a lot
339 // of collisions.
340 //
341 // Instead we combine hash values by assigning numbers to the lattice
342 // points in the plane, and then treating the inputs x and y as
343 // coordinates identifying a lattice point. Then the combined hash
344 // value is just the number assigned to the lattice point. This way
345 // each unique input pair (x, y) gets a unique output hash code.
346 //
347 // We number lattice points by triangular numbers like this:
348 //
349 // X 0 1 2 3 4 5
350 // Y
351 // 0 0 2 5 9 14 20
352 // 1 1 4 8 13 19 26
353 // 2 3 7 12 18 25 33
354 // 3 6 11 17 24 32 41
355 // 4 10 16 23 31 40 50
356 // 5 15 22 30 39 49 60
357 //
358 // This takes a couple of additions and a multiplication, which is a bit
359 // more expensive than something like an exclusive or, but the quality
360 // improvement outweighs the added expense.
361 x += y;
362 return y + x * (x + 1) / 2;
363 }
364
365 size_t _state = 0;
366 bool _didOne = false;
367};
368
460class TfHash {
461public:
464 template <class T>
465 auto operator()(T &&obj) const ->
466 decltype(Tf_HashImpl(std::declval<Tf_HashState &>(),
467 std::forward<T>(obj), 0), size_t()) {
468 Tf_HashState h;
469 Tf_HashImpl(h, std::forward<T>(obj), 0);
470 return h.GetCode();
471 }
472
474 template <class... Args>
475 static size_t Combine(Args &&... args) {
476 Tf_HashState h;
477 _CombineImpl(h, std::forward<Args>(args)...);
478 return h.GetCode();
479 }
480
481private:
482 template <class HashState, class T, class... Args>
483 static void _CombineImpl(HashState &h, T &&obj, Args &&... rest) {
484 Tf_HashImpl(h, std::forward<T>(obj), 0);
485 _CombineImpl(h, std::forward<Args>(rest)...);
486 }
487
488 template <class HashState>
489 static void _CombineImpl(HashState &h) {
490 // base case does nothing
491 }
492};
493
496 size_t operator()(const char* ptr) const;
497};
498
501 size_t operator()(const char* ptr) const;
502};
503
506 bool operator()(const char* lhs, const char* rhs) const;
507};
508
509PXR_NAMESPACE_CLOSE_SCOPE
510
511#endif
A user-extensible hashing mechanism for use with runtime hash tables.
Definition: hash.h:460
static size_t Combine(Args &&... args)
Produce a hash code by combining the hash codes of several objects.
Definition: hash.h:475
auto operator()(T &&obj) const -> decltype(Tf_HashImpl(std::declval< Tf_HashState & >(), std::forward< T >(obj), 0), size_t())
Produce a hash code for obj.
Definition: hash.h:465
A structure that wraps a char pointer, indicating intent that it should be hashed as a c-style null t...
Definition: hash.h:145
A function object that compares two c-strings for equality.
Definition: hash.h:505
A hash function object that hashes null-terminated c-string content.
Definition: hash.h:500
A hash function object that hashes the address of a char pointer.
Definition: hash.h:495
TfCStrHashWrapper TfHashAsCStr(char const *cstr)
Indicate that a char pointer is intended to be hashed as a C-style null terminated string.
Definition: hash.h:158
A file containing basic constants and definitions.
size_t hash_value(const TfToken &x)
Overload hash_value for TfToken.
Definition: token.h:437