This document is for a version of USD that is under development. See this page for the current release.
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
hash.h
Go to the documentation of this file.
1//
2// Copyright 2016 Pixar
3//
4// Licensed under the terms set forth in the LICENSE.txt file available at
5// https://openusd.org/license.
6//
7#ifndef PXR_BASE_TF_HASH_H
8#define PXR_BASE_TF_HASH_H
9
12
13#include "pxr/pxr.h"
14#include "pxr/base/tf/tf.h"
15#include "pxr/base/tf/api.h"
16
17#include <cstring>
18#include <string>
19#include <map>
20#include <memory>
21#include <set>
22#include <typeindex>
23#include <type_traits>
24#include <utility>
25#include <vector>
26
27PXR_NAMESPACE_OPEN_SCOPE
28
29// Support integers.
30template <class HashState, class T>
31std::enable_if_t<std::is_integral<T>::value>
32TfHashAppend(HashState &h, T integral)
33{
34 h.Append(integral);
35}
36
37// Simple metafunction that returns an unsigned integral type given a size in
38// bytes.
39template <size_t Size> struct Tf_SizedUnsignedInt;
40template <> struct Tf_SizedUnsignedInt<1> { using type = uint8_t; };
41template <> struct Tf_SizedUnsignedInt<2> { using type = uint16_t; };
42template <> struct Tf_SizedUnsignedInt<4> { using type = uint32_t; };
43template <> struct Tf_SizedUnsignedInt<8> { using type = uint64_t; };
44
45// Support enums.
46template <class HashState, class Enum>
47std::enable_if_t<std::is_enum<Enum>::value>
48TfHashAppend(HashState &h, Enum e)
49{
50 h.Append(static_cast<std::underlying_type_t<Enum>>(e));
51}
52
53// Support floating point.
54template <class HashState, class T>
55std::enable_if_t<std::is_floating_point<T>::value>
56TfHashAppend(HashState &h, T fp)
57{
58 // We want both positive and negative zero to hash the same, so we have to
59 // check against zero here.
60 typename Tf_SizedUnsignedInt<sizeof(T)>::type intbuf = 0;
61 if (fp != static_cast<T>(0)) {
62 memcpy(&intbuf, &fp, sizeof(T));
63 }
64 h.Append(intbuf);
65}
66
67// Support std::pair.
68template <class HashState, class T, class U>
69inline void
70TfHashAppend(HashState &h, std::pair<T, U> const &p)
71{
72 h.Append(p.first);
73 h.Append(p.second);
74}
75
76// Support std::vector. std::vector<bool> specialized below.
77template <class HashState, class T>
78inline void
79TfHashAppend(HashState &h, std::vector<T> const &vec)
80{
81 static_assert(!std::is_same_v<std::remove_cv_t<T>, bool>,
82 "Unexpected usage of vector of 'bool'."
83 "Expected explicit overload.");
84 h.AppendContiguous(vec.data(), vec.size());
85}
86
87// Support std::vector<bool>.
88template <class HashState>
89inline void
90TfHashAppend(HashState &h, std::vector<bool> const &vec)
91{
92 h.Append(std::hash<std::vector<bool>>{}(vec));
93}
94
95// Support std::set.
96// NOTE: Supporting std::unordered_set is complicated because the traversal
97// order is not guaranteed
98template <class HashState, class T, class Compare>
99inline void
100TfHashAppend(HashState& h, std::set<T, Compare> const &elements)
101{
102 h.AppendRange(std::begin(elements), std::end(elements));
103}
104
105// Support std::map.
106// NOTE: Supporting std::unordered_map is complicated because the traversal
107// order is not guaranteed
108template <class HashState, class Key, class Value, class Compare>
109inline void
110TfHashAppend(HashState& h, std::map<Key, Value, Compare> const &elements)
111{
112 h.AppendRange(std::begin(elements), std::end(elements));
113}
114
115// Support for hashing std::string.
116template <class HashState>
117inline void
118TfHashAppend(HashState &h, const std::string& s)
119{
120 return h.AppendContiguous(s.c_str(), s.length());
121}
122
123// Support for hashing pointers, but we explicitly delete the version for
124// [const] char pointers. See more below.
125template <class HashState, class T>
126inline void
127TfHashAppend(HashState &h, const T* ptr) {
128 return h.Append(reinterpret_cast<uintptr_t>(ptr));
129}
130
131// We refuse to hash [const] char *. You're almost certainly trying to hash the
132// pointed-to string and this will not do that (it will hash the pointer
133// itself). To hash a c-style null terminated string, you can use
134// TfHashAsCStr() to indicate the intent, or use TfHashCString. If you
135// really want to hash the pointer then use static_cast<const void*>(ptr) or
136// TfHashCharPtr.
137template <class HashState>
138inline void TfHashAppend(HashState &h, char const *ptr) = delete;
139template <class HashState>
140inline void TfHashAppend(HashState &h, char *ptr) = delete;
141
145{
146 explicit TfCStrHashWrapper(char const *cstr) : cstr(cstr) {}
147 char const *cstr;
148};
149
158TfHashAsCStr(char const *cstr)
159{
160 return TfCStrHashWrapper(cstr);
161}
162
163template <class HashState>
164inline void TfHashAppend(HashState &h, TfCStrHashWrapper hcstr)
165{
166 return h.AppendContiguous(hcstr.cstr, std::strlen(hcstr.cstr));
167}
168
169// Implementation detail: dispatch based on hash capability: Try TfHashAppend
170// first, otherwise try std::hash, followed by hash_value. We rely on a
171// combination of expression SFINAE and establishing preferred order by passing
172// a 0 constant and having the overloads take int (highest priority), long
173// (next priority) and '...' (lowest priority).
174
175// std::hash version, attempted second.
176template <class HashState, class T>
177inline auto Tf_HashImpl(HashState &h, T &&obj, long)
178 -> decltype(std::hash<typename std::decay<T>::type>()(
179 std::forward<T>(obj)), void())
180{
181 TfHashAppend(
182 h, std::hash<typename std::decay<T>::type>()(std::forward<T>(obj)));
183}
184
185// hash_value, attempted last.
186template <class HashState, class T>
187inline auto Tf_HashImpl(HashState &h, T &&obj, ...)
188 -> decltype(hash_value(std::forward<T>(obj)), void())
189{
190 TfHashAppend(h, hash_value(std::forward<T>(obj)));
191}
192
193// TfHashAppend, attempted first.
194template <class HashState, class T>
195inline auto Tf_HashImpl(HashState &h, T &&obj, int)
196 -> decltype(TfHashAppend(h, std::forward<T>(obj)), void())
197{
198 TfHashAppend(h, std::forward<T>(obj));
199}
200
201// Implementation detail, CRTP base that provides the public interface for hash
202// state implementations.
203template <class Derived>
204class Tf_HashStateAPI
205{
206public:
207 // Append several objects to the hash state.
208 template <class... Args>
209 void Append(Args &&... args) {
210 _AppendImpl(std::forward<Args>(args)...);
211 }
212
213 // Append contiguous objects to the hash state.
214 template <class T>
215 void AppendContiguous(T const *elems, size_t numElems) {
216 this->_AsDerived()._AppendContiguous(elems, numElems);
217 }
218
219 // Append a range of objects to the hash state.
220 template <class Iter>
221 void AppendRange(Iter f, Iter l) {
222 this->_AsDerived()._AppendRange(f, l);
223 }
224
225 // Return the hash code for the current state.
226 size_t GetCode() const {
227 return this->_AsDerived()._GetCode();
228 }
229
230private:
231 template <class T, class... Args>
232 void _AppendImpl(T &&obj, Args &&... rest) {
233 this->_AsDerived()._Append(std::forward<T>(obj));
234 _AppendImpl(std::forward<Args>(rest)...);
235 }
236 void _AppendImpl() const {
237 // base case intentionally empty.
238 }
239
240 Derived &_AsDerived() {
241 return *static_cast<Derived *>(this);
242 }
243
244 Derived const &_AsDerived() const {
245 return *static_cast<Derived const *>(this);
246 }
247};
248
249// Implementation detail, accumulates hashes.
250class Tf_HashState : public Tf_HashStateAPI<Tf_HashState>
251{
252private:
253 friend class Tf_HashStateAPI<Tf_HashState>;
254
255 // Go thru Tf_HashImpl for non-integers.
256 template <class T>
257 std::enable_if_t<!std::is_integral<std::decay_t<T>>::value>
258 _Append(T &&obj) {
259 Tf_HashImpl(*this, std::forward<T>(obj), 0);
260 }
261
262 // Integers bottom out here.
263 template <class T>
264 std::enable_if_t<std::is_integral<T>::value>
265 _Append(T i) {
266 if (!_didOne) {
267 _state = i;
268 _didOne = true;
269 }
270 else {
271 _state = _Combine(_state, i);
272 }
273 }
274
275 // Append contiguous objects.
276 template <class T>
277 std::enable_if_t<std::is_integral<T>::value>
278 _AppendContiguous(T const *elems, size_t numElems) {
279 _AppendBytes(reinterpret_cast<char const *>(elems),
280 numElems * sizeof(T));
281 }
282
283 // Append contiguous objects.
284 template <class T>
285 std::enable_if_t<!std::is_integral<T>::value>
286 _AppendContiguous(T const *elems, size_t numElems) {
287 while (numElems--) {
288 Append(*elems++);
289 }
290 }
291
292 // Append a range.
293 template <class Iter>
294 void _AppendRange(Iter f, Iter l) {
295 while (f != l) {
296 _Append(*f++);
297 }
298 }
299
301 TF_API void _AppendBytes(char const *bytes, size_t numBytes);
302
303 // Return the hash code for the accumulated hash state.
304 size_t _GetCode() const {
305 // This is based on Knuth's multiplicative hash for integers. The
306 // constant is the closest prime to the binary expansion of the inverse
307 // golden ratio. The best way to produce a hash table bucket index from
308 // the result is to shift the result right, since the higher order bits
309 // have the most entropy. But since we can't know the number of buckets
310 // in a table that's using this, we just reverse the byte order instead,
311 // to get the highest entropy bits into the low-order bytes.
312 return _SwapByteOrder(_state * 11400714819323198549ULL);
313 }
314
315 // This turns into a single bswap/pshufb type instruction on most compilers.
316 inline uint64_t
317 _SwapByteOrder(uint64_t val) const {
318 val =
319 ((val & 0xFF00000000000000u) >> 56u) |
320 ((val & 0x00FF000000000000u) >> 40u) |
321 ((val & 0x0000FF0000000000u) >> 24u) |
322 ((val & 0x000000FF00000000u) >> 8u) |
323 ((val & 0x00000000FF000000u) << 8u) |
324 ((val & 0x0000000000FF0000u) << 24u) |
325 ((val & 0x000000000000FF00u) << 40u) |
326 ((val & 0x00000000000000FFu) << 56u);
327 return val;
328 }
329
330 size_t _Combine(size_t x, size_t y) const {
331 // This is our hash combiner. The task is, given two hash codes x and
332 // y, compute a single hash code. One way to do this is to exclusive-or
333 // the two codes together, but this can produce bad results if they
334 // differ by some fixed amount, For example if the input codes are
335 // multiples of 32, and the two codes we're given are N and N + 32 (this
336 // happens commonly when the hashed values are memory addresses) then
337 // the resulting hash codes for successive pairs of these produces many
338 // repeating values (32, 96, 32, XXX, 32, 96, 32, YYY...). That's a lot
339 // of collisions.
340 //
341 // Instead we combine hash values by assigning numbers to the lattice
342 // points in the plane, and then treating the inputs x and y as
343 // coordinates identifying a lattice point. Then the combined hash
344 // value is just the number assigned to the lattice point. This way
345 // each unique input pair (x, y) gets a unique output hash code.
346 //
347 // We number lattice points by triangular numbers like this:
348 //
349 // X 0 1 2 3 4 5
350 // Y
351 // 0 0 2 5 9 14 20
352 // 1 1 4 8 13 19 26
353 // 2 3 7 12 18 25 33
354 // 3 6 11 17 24 32 41
355 // 4 10 16 23 31 40 50
356 // 5 15 22 30 39 49 60
357 //
358 // This takes a couple of additions and a multiplication, which is a bit
359 // more expensive than something like an exclusive or, but the quality
360 // improvement outweighs the added expense.
361 x += y;
362 return y + x * (x + 1) / 2;
363 }
364
365 size_t _state = 0;
366 bool _didOne = false;
367};
368
460class TfHash {
461public:
464 template <class T>
465 auto operator()(T &&obj) const ->
466 decltype(Tf_HashImpl(std::declval<Tf_HashState &>(),
467 std::forward<T>(obj), 0), size_t()) {
468 Tf_HashState h;
469 Tf_HashImpl(h, std::forward<T>(obj), 0);
470 return h.GetCode();
471 }
472
474 template <class... Args>
475 static size_t Combine(Args &&... args) {
476 Tf_HashState h;
477 _CombineImpl(h, std::forward<Args>(args)...);
478 return h.GetCode();
479 }
480
481private:
482 template <class HashState, class T, class... Args>
483 static void _CombineImpl(HashState &h, T &&obj, Args &&... rest) {
484 Tf_HashImpl(h, std::forward<T>(obj), 0);
485 _CombineImpl(h, std::forward<Args>(rest)...);
486 }
487
488 template <class HashState>
489 static void _CombineImpl(HashState &h) {
490 // base case does nothing
491 }
492};
493
496 size_t operator()(const char* ptr) const;
497};
498
501 size_t operator()(const char* ptr) const;
502};
503
506 bool operator()(const char* lhs, const char* rhs) const;
507};
508
509PXR_NAMESPACE_CLOSE_SCOPE
510
511#endif
A user-extensible hashing mechanism for use with runtime hash tables.
Definition: hash.h:460
static size_t Combine(Args &&... args)
Produce a hash code by combining the hash codes of several objects.
Definition: hash.h:475
auto operator()(T &&obj) const -> decltype(Tf_HashImpl(std::declval< Tf_HashState & >(), std::forward< T >(obj), 0), size_t())
Produce a hash code for obj.
Definition: hash.h:465
A structure that wraps a char pointer, indicating intent that it should be hashed as a c-style null t...
Definition: hash.h:145
A function object that compares two c-strings for equality.
Definition: hash.h:505
A hash function object that hashes null-terminated c-string content.
Definition: hash.h:500
A hash function object that hashes the address of a char pointer.
Definition: hash.h:495
TfCStrHashWrapper TfHashAsCStr(char const *cstr)
Indicate that a char pointer is intended to be hashed as a C-style null terminated string.
Definition: hash.h:158
A file containing basic constants and definitions.
size_t hash_value(const TfToken &x)
Overload hash_value for TfToken.
Definition: token.h:437