This document is for a version of USD that is under development. See this page for the current release.
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
hash.h
Go to the documentation of this file.
1//
2// Copyright 2016 Pixar
3//
4// Licensed under the terms set forth in the LICENSE.txt file available at
5// https://openusd.org/license.
6//
7#ifndef PXR_BASE_TF_HASH_H
8#define PXR_BASE_TF_HASH_H
9
12
13#include "pxr/pxr.h"
14#include "pxr/base/tf/tf.h"
15#include "pxr/base/tf/api.h"
16
17#include <cstring>
18#include <string>
19#include <map>
20#include <memory>
21#include <set>
22#include <tuple>
23#include <typeindex>
24#include <type_traits>
25#include <utility>
26#include <vector>
27
28PXR_NAMESPACE_OPEN_SCOPE
29
30// Support integers.
31template <class HashState, class T>
32std::enable_if_t<std::is_integral<T>::value>
33TfHashAppend(HashState &h, T integral)
34{
35 h.Append(integral);
36}
37
38// Simple metafunction that returns an unsigned integral type given a size in
39// bytes.
40template <size_t Size> struct Tf_SizedUnsignedInt;
41template <> struct Tf_SizedUnsignedInt<1> { using type = uint8_t; };
42template <> struct Tf_SizedUnsignedInt<2> { using type = uint16_t; };
43template <> struct Tf_SizedUnsignedInt<4> { using type = uint32_t; };
44template <> struct Tf_SizedUnsignedInt<8> { using type = uint64_t; };
45
46// Support enums.
47template <class HashState, class Enum>
48std::enable_if_t<std::is_enum<Enum>::value>
49TfHashAppend(HashState &h, Enum e)
50{
51 h.Append(static_cast<std::underlying_type_t<Enum>>(e));
52}
53
54// Support floating point.
55template <class HashState, class T>
56std::enable_if_t<std::is_floating_point<T>::value>
57TfHashAppend(HashState &h, T fp)
58{
59 // We want both positive and negative zero to hash the same, so we have to
60 // check against zero here.
61 typename Tf_SizedUnsignedInt<sizeof(T)>::type intbuf = 0;
62 if (fp != static_cast<T>(0)) {
63 memcpy(&intbuf, &fp, sizeof(T));
64 }
65 h.Append(intbuf);
66}
67
68// Support std::pair.
69template <class HashState, class T, class U>
70inline void
71TfHashAppend(HashState &h, std::pair<T, U> const &p)
72{
73 h.Append(p.first);
74 h.Append(p.second);
75}
76
77// Support std::tuple.
78template <class HashState, class... T>
79inline void
80TfHashAppend(HashState &h, std::tuple<T...> const &t)
81{
82 // XXX:
83 // This gives the same hash for a std::pair<T, U> and std::tuple<T, U>
84 // which may not be ideal in some cases. See USD-10578.
85 std::apply([&h](auto const&... v) { h.Append(v...); }, t);
86}
87
88// Support std::vector. std::vector<bool> specialized below.
89template <class HashState, class T>
90inline void
91TfHashAppend(HashState &h, std::vector<T> const &vec)
92{
93 static_assert(!std::is_same_v<std::remove_cv_t<T>, bool>,
94 "Unexpected usage of vector of 'bool'."
95 "Expected explicit overload.");
96 h.AppendContiguous(vec.data(), vec.size());
97}
98
99// Support std::vector<bool>.
100template <class HashState>
101inline void
102TfHashAppend(HashState &h, std::vector<bool> const &vec)
103{
104 h.Append(std::hash<std::vector<bool>>{}(vec));
105}
106
107// Support std::set.
108// NOTE: Supporting std::unordered_set is complicated because the traversal
109// order is not guaranteed
110template <class HashState, class T, class Compare>
111inline void
112TfHashAppend(HashState& h, std::set<T, Compare> const &elements)
113{
114 h.AppendRange(std::begin(elements), std::end(elements));
115}
116
117// Support std::map.
118// NOTE: Supporting std::unordered_map is complicated because the traversal
119// order is not guaranteed
120template <class HashState, class Key, class Value, class Compare>
121inline void
122TfHashAppend(HashState& h, std::map<Key, Value, Compare> const &elements)
123{
124 h.AppendRange(std::begin(elements), std::end(elements));
125}
126
127// Support for hashing std::string.
128template <class HashState>
129inline void
130TfHashAppend(HashState &h, const std::string& s)
131{
132 return h.AppendContiguous(s.c_str(), s.length());
133}
134
135// Support for hashing pointers, but we explicitly delete the version for
136// [const] char pointers. See more below.
137template <class HashState, class T>
138inline void
139TfHashAppend(HashState &h, const T* ptr) {
140 return h.Append(reinterpret_cast<uintptr_t>(ptr));
141}
142
143// We refuse to hash [const] char *. You're almost certainly trying to hash the
144// pointed-to string and this will not do that (it will hash the pointer
145// itself). To hash a c-style null terminated string, you can use
146// TfHashAsCStr() to indicate the intent, or use TfHashCString. If you
147// really want to hash the pointer then use static_cast<const void*>(ptr) or
148// TfHashCharPtr.
149template <class HashState>
150inline void TfHashAppend(HashState &h, char const *ptr) = delete;
151template <class HashState>
152inline void TfHashAppend(HashState &h, char *ptr) = delete;
153
157{
158 explicit TfCStrHashWrapper(char const *cstr) : cstr(cstr) {}
159 char const *cstr;
160};
161
170TfHashAsCStr(char const *cstr)
171{
172 return TfCStrHashWrapper(cstr);
173}
174
175template <class HashState>
176inline void TfHashAppend(HashState &h, TfCStrHashWrapper hcstr)
177{
178 return h.AppendContiguous(hcstr.cstr, std::strlen(hcstr.cstr));
179}
180
181// Implementation detail: dispatch based on hash capability: Try TfHashAppend
182// first, otherwise try std::hash, followed by hash_value. We rely on a
183// combination of expression SFINAE and establishing preferred order by passing
184// a 0 constant and having the overloads take int (highest priority), long
185// (next priority) and '...' (lowest priority).
186
187// std::hash version, attempted second.
188template <class HashState, class T>
189inline auto Tf_HashImpl(HashState &h, T &&obj, long)
190 -> decltype(std::hash<typename std::decay<T>::type>()(
191 std::forward<T>(obj)), void())
192{
193 TfHashAppend(
194 h, std::hash<typename std::decay<T>::type>()(std::forward<T>(obj)));
195}
196
197// hash_value, attempted last.
198template <class HashState, class T>
199inline auto Tf_HashImpl(HashState &h, T &&obj, ...)
200 -> decltype(hash_value(std::forward<T>(obj)), void())
201{
202 TfHashAppend(h, hash_value(std::forward<T>(obj)));
203}
204
205// TfHashAppend, attempted first.
206template <class HashState, class T>
207inline auto Tf_HashImpl(HashState &h, T &&obj, int)
208 -> decltype(TfHashAppend(h, std::forward<T>(obj)), void())
209{
210 TfHashAppend(h, std::forward<T>(obj));
211}
212
213// Implementation detail, CRTP base that provides the public interface for hash
214// state implementations.
215template <class Derived>
216class Tf_HashStateAPI
217{
218public:
219 // Append several objects to the hash state.
220 template <class... Args>
221 void Append(Args &&... args) {
222 _AppendImpl(std::forward<Args>(args)...);
223 }
224
225 // Append contiguous objects to the hash state.
226 template <class T>
227 void AppendContiguous(T const *elems, size_t numElems) {
228 this->_AsDerived()._AppendContiguous(elems, numElems);
229 }
230
231 // Append a range of objects to the hash state.
232 template <class Iter>
233 void AppendRange(Iter f, Iter l) {
234 this->_AsDerived()._AppendRange(f, l);
235 }
236
237 // Return the hash code for the current state.
238 size_t GetCode() const {
239 return this->_AsDerived()._GetCode();
240 }
241
242private:
243 template <class T, class... Args>
244 void _AppendImpl(T &&obj, Args &&... rest) {
245 this->_AsDerived()._Append(std::forward<T>(obj));
246 _AppendImpl(std::forward<Args>(rest)...);
247 }
248 void _AppendImpl() const {
249 // base case intentionally empty.
250 }
251
252 Derived &_AsDerived() {
253 return *static_cast<Derived *>(this);
254 }
255
256 Derived const &_AsDerived() const {
257 return *static_cast<Derived const *>(this);
258 }
259};
260
261// Implementation detail, accumulates hashes.
262class Tf_HashState : public Tf_HashStateAPI<Tf_HashState>
263{
264private:
265 friend class Tf_HashStateAPI<Tf_HashState>;
266
267 // Go thru Tf_HashImpl for non-integers.
268 template <class T>
269 std::enable_if_t<!std::is_integral<std::decay_t<T>>::value>
270 _Append(T &&obj) {
271 Tf_HashImpl(*this, std::forward<T>(obj), 0);
272 }
273
274 // Integers bottom out here.
275 template <class T>
276 std::enable_if_t<std::is_integral<T>::value>
277 _Append(T i) {
278 if (!_didOne) {
279 _state = i;
280 _didOne = true;
281 }
282 else {
283 _state = _Combine(_state, i);
284 }
285 }
286
287 // Append contiguous objects.
288 template <class T>
289 std::enable_if_t<std::is_integral<T>::value>
290 _AppendContiguous(T const *elems, size_t numElems) {
291 _AppendBytes(reinterpret_cast<char const *>(elems),
292 numElems * sizeof(T));
293 }
294
295 // Append contiguous objects.
296 template <class T>
297 std::enable_if_t<!std::is_integral<T>::value>
298 _AppendContiguous(T const *elems, size_t numElems) {
299 while (numElems--) {
300 Append(*elems++);
301 }
302 }
303
304 // Append a range.
305 template <class Iter>
306 void _AppendRange(Iter f, Iter l) {
307 while (f != l) {
308 _Append(*f++);
309 }
310 }
311
313 TF_API void _AppendBytes(char const *bytes, size_t numBytes);
314
315 // Return the hash code for the accumulated hash state.
316 size_t _GetCode() const {
317 // This is based on Knuth's multiplicative hash for integers. The
318 // constant is the closest prime to the binary expansion of the inverse
319 // golden ratio. The best way to produce a hash table bucket index from
320 // the result is to shift the result right, since the higher order bits
321 // have the most entropy. But since we can't know the number of buckets
322 // in a table that's using this, we just reverse the byte order instead,
323 // to get the highest entropy bits into the low-order bytes.
324 return _SwapByteOrder(_state * 11400714819323198549ULL);
325 }
326
327 // This turns into a single bswap/pshufb type instruction on most compilers.
328 inline uint64_t
329 _SwapByteOrder(uint64_t val) const {
330 val =
331 ((val & 0xFF00000000000000u) >> 56u) |
332 ((val & 0x00FF000000000000u) >> 40u) |
333 ((val & 0x0000FF0000000000u) >> 24u) |
334 ((val & 0x000000FF00000000u) >> 8u) |
335 ((val & 0x00000000FF000000u) << 8u) |
336 ((val & 0x0000000000FF0000u) << 24u) |
337 ((val & 0x000000000000FF00u) << 40u) |
338 ((val & 0x00000000000000FFu) << 56u);
339 return val;
340 }
341
342 size_t _Combine(size_t x, size_t y) const {
343 // This is our hash combiner. The task is, given two hash codes x and
344 // y, compute a single hash code. One way to do this is to exclusive-or
345 // the two codes together, but this can produce bad results if they
346 // differ by some fixed amount, For example if the input codes are
347 // multiples of 32, and the two codes we're given are N and N + 32 (this
348 // happens commonly when the hashed values are memory addresses) then
349 // the resulting hash codes for successive pairs of these produces many
350 // repeating values (32, 96, 32, XXX, 32, 96, 32, YYY...). That's a lot
351 // of collisions.
352 //
353 // Instead we combine hash values by assigning numbers to the lattice
354 // points in the plane, and then treating the inputs x and y as
355 // coordinates identifying a lattice point. Then the combined hash
356 // value is just the number assigned to the lattice point. This way
357 // each unique input pair (x, y) gets a unique output hash code.
358 //
359 // We number lattice points by triangular numbers like this:
360 //
361 // X 0 1 2 3 4 5
362 // Y
363 // 0 0 2 5 9 14 20
364 // 1 1 4 8 13 19 26
365 // 2 3 7 12 18 25 33
366 // 3 6 11 17 24 32 41
367 // 4 10 16 23 31 40 50
368 // 5 15 22 30 39 49 60
369 //
370 // This takes a couple of additions and a multiplication, which is a bit
371 // more expensive than something like an exclusive or, but the quality
372 // improvement outweighs the added expense.
373 x += y;
374 return y + x * (x + 1) / 2;
375 }
376
377 size_t _state = 0;
378 bool _didOne = false;
379};
380
472class TfHash {
473public:
476 template <class T>
477 auto operator()(T &&obj) const ->
478 decltype(Tf_HashImpl(std::declval<Tf_HashState &>(),
479 std::forward<T>(obj), 0), size_t()) {
480 Tf_HashState h;
481 Tf_HashImpl(h, std::forward<T>(obj), 0);
482 return h.GetCode();
483 }
484
486 template <class... Args>
487 static size_t Combine(Args &&... args) {
488 Tf_HashState h;
489 _CombineImpl(h, std::forward<Args>(args)...);
490 return h.GetCode();
491 }
492
493private:
494 template <class HashState, class T, class... Args>
495 static void _CombineImpl(HashState &h, T &&obj, Args &&... rest) {
496 Tf_HashImpl(h, std::forward<T>(obj), 0);
497 _CombineImpl(h, std::forward<Args>(rest)...);
498 }
499
500 template <class HashState>
501 static void _CombineImpl(HashState &h) {
502 // base case does nothing
503 }
504};
505
508 size_t operator()(const char* ptr) const;
509};
510
513 size_t operator()(const char* ptr) const;
514};
515
518 bool operator()(const char* lhs, const char* rhs) const;
519};
520
521PXR_NAMESPACE_CLOSE_SCOPE
522
523#endif
A user-extensible hashing mechanism for use with runtime hash tables.
Definition: hash.h:472
static size_t Combine(Args &&... args)
Produce a hash code by combining the hash codes of several objects.
Definition: hash.h:487
auto operator()(T &&obj) const -> decltype(Tf_HashImpl(std::declval< Tf_HashState & >(), std::forward< T >(obj), 0), size_t())
Produce a hash code for obj.
Definition: hash.h:477
A structure that wraps a char pointer, indicating intent that it should be hashed as a c-style null t...
Definition: hash.h:157
A function object that compares two c-strings for equality.
Definition: hash.h:517
A hash function object that hashes null-terminated c-string content.
Definition: hash.h:512
A hash function object that hashes the address of a char pointer.
Definition: hash.h:507
TfCStrHashWrapper TfHashAsCStr(char const *cstr)
Indicate that a char pointer is intended to be hashed as a C-style null terminated string.
Definition: hash.h:170
A file containing basic constants and definitions.
size_t hash_value(const TfToken &x)
Overload hash_value for TfToken.
Definition: token.h:437