hash.h
Go to the documentation of this file.
1 //
2 // Copyright 2016 Pixar
3 //
4 // Licensed under the Apache License, Version 2.0 (the "Apache License")
5 // with the following modification; you may not use this file except in
6 // compliance with the Apache License and the following modification to it:
7 // Section 6. Trademarks. is deleted and replaced with:
8 //
9 // 6. Trademarks. This License does not grant permission to use the trade
10 // names, trademarks, service marks, or product names of the Licensor
11 // and its affiliates, except as required to comply with Section 4(c) of
12 // the License and to reproduce the content of the NOTICE file.
13 //
14 // You may obtain a copy of the Apache License at
15 //
16 // http://www.apache.org/licenses/LICENSE-2.0
17 //
18 // Unless required by applicable law or agreed to in writing, software
19 // distributed under the Apache License with the above modification is
20 // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 // KIND, either express or implied. See the Apache License for the specific
22 // language governing permissions and limitations under the Apache License.
23 //
24 #ifndef PXR_BASE_TF_HASH_H
25 #define PXR_BASE_TF_HASH_H
26 
29 
30 #include "pxr/pxr.h"
31 #include "pxr/base/tf/tf.h"
32 #include "pxr/base/tf/api.h"
33 
34 #include <cstring>
35 #include <string>
36 #include <map>
37 #include <memory>
38 #include <set>
39 #include <typeindex>
40 #include <type_traits>
41 #include <utility>
42 #include <vector>
43 
44 PXR_NAMESPACE_OPEN_SCOPE
45 
46 // Support integers.
47 template <class HashState, class T>
48 std::enable_if_t<std::is_integral<T>::value>
49 TfHashAppend(HashState &h, T integral)
50 {
51  h.Append(integral);
52 }
53 
54 // Simple metafunction that returns an unsigned integral type given a size in
55 // bytes.
56 template <size_t Size> struct Tf_SizedUnsignedInt;
57 template <> struct Tf_SizedUnsignedInt<1> { using type = uint8_t; };
58 template <> struct Tf_SizedUnsignedInt<2> { using type = uint16_t; };
59 template <> struct Tf_SizedUnsignedInt<4> { using type = uint32_t; };
60 template <> struct Tf_SizedUnsignedInt<8> { using type = uint64_t; };
61 
62 // Support enums.
63 template <class HashState, class Enum>
64 std::enable_if_t<std::is_enum<Enum>::value>
65 TfHashAppend(HashState &h, Enum e)
66 {
67  h.Append(static_cast<std::underlying_type_t<Enum>>(e));
68 }
69 
70 // Support floating point.
71 template <class HashState, class T>
72 std::enable_if_t<std::is_floating_point<T>::value>
73 TfHashAppend(HashState &h, T fp)
74 {
75  // We want both positive and negative zero to hash the same, so we have to
76  // check against zero here.
77  typename Tf_SizedUnsignedInt<sizeof(T)>::type intbuf = 0;
78  if (fp != static_cast<T>(0)) {
79  memcpy(&intbuf, &fp, sizeof(T));
80  }
81  h.Append(intbuf);
82 }
83 
84 // Support std::pair.
85 template <class HashState, class T, class U>
86 inline void
87 TfHashAppend(HashState &h, std::pair<T, U> const &p)
88 {
89  h.Append(p.first);
90  h.Append(p.second);
91 }
92 
93 // Support std::vector. std::vector<bool> specialized below.
94 template <class HashState, class T>
95 inline std::enable_if_t<!std::is_same<std::remove_const_t<T>, bool>::value>
96 TfHashAppend(HashState &h, std::vector<T> const &vec)
97 {
98  h.AppendContiguous(vec.data(), vec.size());
99 }
100 
101 // Support std::vector<bool>.
102 template <class HashState>
103 inline void
104 TfHashAppend(HashState &h, std::vector<bool> const &vec)
105 {
106  h.Append(std::hash<std::vector<bool>>{}(vec));
107 }
108 
109 // Support std::set.
110 // NOTE: Supporting std::unordered_set is complicated because the traversal
111 // order is not guaranteed
112 template <class HashState, class T, class Compare>
113 inline void
114 TfHashAppend(HashState& h, std::set<T, Compare> const &elements)
115 {
116  h.AppendRange(std::begin(elements), std::end(elements));
117 }
118 
119 // Support std::map.
120 // NOTE: Supporting std::unordered_map is complicated because the traversal
121 // order is not guaranteed
122 template <class HashState, class Key, class Value, class Compare>
123 inline void
124 TfHashAppend(HashState& h, std::map<Key, Value, Compare> const &elements)
125 {
126  h.AppendRange(std::begin(elements), std::end(elements));
127 }
128 
129 // Support std::type_index. When TfHash support for std::hash is enabled,
130 // this explicit specialization will no longer be necessary.
131 template <class HashState>
132 inline void
133 TfHashAppend(HashState& h, std::type_index const &type_index)
134 {
135  return h.Append(type_index.hash_code());
136 }
137 
138 // Support for hashing std::string.
139 template <class HashState>
140 inline void
141 TfHashAppend(HashState &h, const std::string& s)
142 {
143  return h.AppendContiguous(s.c_str(), s.length());
144 }
145 
146 // Support for hashing pointers, but we explicitly delete the version for
147 // [const] char pointers. See more below.
148 template <class HashState, class T>
149 inline void
150 TfHashAppend(HashState &h, const T* ptr) {
151  return h.Append(reinterpret_cast<uintptr_t>(ptr));
152 }
153 
154 // Support for hashing std::shared_ptr. When TfHash support for std::hash is
155 // enabled, this explicit specialization will no longer be necessary.
156 template <class HashState, class T>
157 inline void
158 TfHashAppend(HashState &h, const std::shared_ptr<T>& ptr) {
159  h.Append(std::hash<std::shared_ptr<T>>{}(ptr));
160 }
161 
162 // Support for hashing std::unique_ptr. When TfHash support for std::hash is
163 // enabled, this explicit specialization will no longer be necessary.
164 template <class HashState, class T>
165 inline void
166 TfHashAppend(HashState &h, const std::unique_ptr<T>& ptr) {
167  h.Append(std::hash<std::unique_ptr<T>>{}(ptr));
168 }
169 
170 // We refuse to hash [const] char *. You're almost certainly trying to hash the
171 // pointed-to string and this will not do that (it will hash the pointer
172 // itself). To hash a c-style null terminated string, you can use
173 // TfHashAsCStr() to indicate the intent, or use TfHashCString. If you
174 // really want to hash the pointer then use static_cast<const void*>(ptr) or
175 // TfHashCharPtr.
176 template <class HashState>
177 inline void TfHashAppend(HashState &h, char const *ptr) = delete;
178 template <class HashState>
179 inline void TfHashAppend(HashState &h, char *ptr) = delete;
180 
184 {
185  explicit TfCStrHashWrapper(char const *cstr) : cstr(cstr) {}
186  char const *cstr;
187 };
188 
196 inline TfCStrHashWrapper
197 TfHashAsCStr(char const *cstr)
198 {
199  return TfCStrHashWrapper(cstr);
200 }
201 
202 template <class HashState>
203 inline void TfHashAppend(HashState &h, TfCStrHashWrapper hcstr)
204 {
205  return h.AppendContiguous(hcstr.cstr, std::strlen(hcstr.cstr));
206 }
207 
208 // Implementation detail: dispatch based on hash capability: Try TfHashAppend
209 // first, otherwise try hash_value. We'd like to otherwise try std::hash<T>,
210 // but std::hash<> is not SFINAE-friendly until c++17 and this code needs to
211 // support c++14 currently. We rely on a combination of expression SFINAE and
212 // establishing preferred order by passing a 0 constant and having the overloads
213 // take int (highest priority), long (next priority) and '...' (lowest
214 // priority).
215 
216 // std::hash version, attempted last. Consider adding when we move to
217 // C++17 or newer.
218 /*
219 template <class HashState, class T>
220 inline auto Tf_HashImpl(HashState &h, T &&obj, ...)
221  -> decltype(std::hash<typename std::decay<T>::type>()(
222  std::forward<T>(obj)), void())
223 {
224  TfHashAppend(
225  h, std::hash<typename std::decay<T>::type>()(std::forward<T>(obj)));
226 }
227 */
228 
229 // hash_value, attempted second.
230 template <class HashState, class T>
231 inline auto Tf_HashImpl(HashState &h, T &&obj, long)
232  -> decltype(hash_value(std::forward<T>(obj)), void())
233 {
234  TfHashAppend(h, hash_value(std::forward<T>(obj)));
235 }
236 
237 // TfHashAppend, attempted first.
238 template <class HashState, class T>
239 inline auto Tf_HashImpl(HashState &h, T &&obj, int)
240  -> decltype(TfHashAppend(h, std::forward<T>(obj)), void())
241 {
242  TfHashAppend(h, std::forward<T>(obj));
243 }
244 
245 // Implementation detail, CRTP base that provides the public interface for hash
246 // state implementations.
247 template <class Derived>
248 class Tf_HashStateAPI
249 {
250 public:
251  // Append several objects to the hash state.
252  template <class... Args>
253  void Append(Args &&... args) {
254  _AppendImpl(std::forward<Args>(args)...);
255  }
256 
257  // Append contiguous objects to the hash state.
258  template <class T>
259  void AppendContiguous(T const *elems, size_t numElems) {
260  this->_AsDerived()._AppendContiguous(elems, numElems);
261  }
262 
263  // Append a range of objects to the hash state.
264  template <class Iter>
265  void AppendRange(Iter f, Iter l) {
266  this->_AsDerived()._AppendRange(f, l);
267  }
268 
269  // Return the hash code for the current state.
270  size_t GetCode() const {
271  return this->_AsDerived()._GetCode();
272  }
273 
274 private:
275  template <class T, class... Args>
276  void _AppendImpl(T &&obj, Args &&... rest) {
277  this->_AsDerived()._Append(std::forward<T>(obj));
278  _AppendImpl(std::forward<Args>(rest)...);
279  }
280  void _AppendImpl() const {
281  // base case intentionally empty.
282  }
283 
284  Derived &_AsDerived() {
285  return *static_cast<Derived *>(this);
286  }
287 
288  Derived const &_AsDerived() const {
289  return *static_cast<Derived const *>(this);
290  }
291 };
292 
293 // Implementation detail, accumulates hashes.
294 class Tf_HashState : public Tf_HashStateAPI<Tf_HashState>
295 {
296 private:
297  friend class Tf_HashStateAPI<Tf_HashState>;
298 
299  // Go thru Tf_HashImpl for non-integers.
300  template <class T>
301  std::enable_if_t<!std::is_integral<std::decay_t<T>>::value>
302  _Append(T &&obj) {
303  Tf_HashImpl(*this, std::forward<T>(obj), 0);
304  }
305 
306  // Integers bottom out here.
307  template <class T>
308  std::enable_if_t<std::is_integral<T>::value>
309  _Append(T i) {
310  if (!_didOne) {
311  _state = i;
312  _didOne = true;
313  }
314  else {
315  _state = _Combine(_state, i);
316  }
317  }
318 
319  // Append contiguous objects.
320  template <class T>
321  std::enable_if_t<std::is_integral<T>::value>
322  _AppendContiguous(T const *elems, size_t numElems) {
323  _AppendBytes(reinterpret_cast<char const *>(elems),
324  numElems * sizeof(T));
325  }
326 
327  // Append contiguous objects.
328  template <class T>
329  std::enable_if_t<!std::is_integral<T>::value>
330  _AppendContiguous(T const *elems, size_t numElems) {
331  while (numElems--) {
332  Append(*elems++);
333  }
334  }
335 
336  // Append a range.
337  template <class Iter>
338  void _AppendRange(Iter f, Iter l) {
339  while (f != l) {
340  _Append(*f++);
341  }
342  }
343 
345  TF_API void _AppendBytes(char const *bytes, size_t numBytes);
346 
347  // Return the hash code for the accumulated hash state.
348  size_t _GetCode() const {
349  // This is based on Knuth's multiplicative hash for integers. The
350  // constant is the closest prime to the binary expansion of the inverse
351  // golden ratio. The best way to produce a hash table bucket index from
352  // the result is to shift the result right, since the higher order bits
353  // have the most entropy. But since we can't know the number of buckets
354  // in a table that's using this, we just reverse the byte order instead,
355  // to get the highest entropy bits into the low-order bytes.
356  return _SwapByteOrder(_state * 11400714819323198549ULL);
357  }
358 
359  // This turns into a single bswap/pshufb type instruction on most compilers.
360  inline uint64_t
361  _SwapByteOrder(uint64_t val) const {
362  val =
363  ((val & 0xFF00000000000000u) >> 56u) |
364  ((val & 0x00FF000000000000u) >> 40u) |
365  ((val & 0x0000FF0000000000u) >> 24u) |
366  ((val & 0x000000FF00000000u) >> 8u) |
367  ((val & 0x00000000FF000000u) << 8u) |
368  ((val & 0x0000000000FF0000u) << 24u) |
369  ((val & 0x000000000000FF00u) << 40u) |
370  ((val & 0x00000000000000FFu) << 56u);
371  return val;
372  }
373 
374  size_t _Combine(size_t x, size_t y) const {
375  // This is our hash combiner. The task is, given two hash codes x and
376  // y, compute a single hash code. One way to do this is to exclusive-or
377  // the two codes together, but this can produce bad results if they
378  // differ by some fixed amount, For example if the input codes are
379  // multiples of 32, and the two codes we're given are N and N + 32 (this
380  // happens commonly when the hashed values are memory addresses) then
381  // the resulting hash codes for successive pairs of these produces many
382  // repeating values (32, 96, 32, XXX, 32, 96, 32, YYY...). That's a lot
383  // of collisions.
384  //
385  // Instead we combine hash values by assigning numbers to the lattice
386  // points in the plane, and then treating the inputs x and y as
387  // coordinates identifying a lattice point. Then the combined hash
388  // value is just the number assigned to the lattice point. This way
389  // each unique input pair (x, y) gets a unique output hash code.
390  //
391  // We number lattice points by triangular numbers like this:
392  //
393  // X 0 1 2 3 4 5
394  // Y
395  // 0 0 2 5 9 14 20
396  // 1 1 4 8 13 19 26
397  // 2 3 7 12 18 25 33
398  // 3 6 11 17 24 32 41
399  // 4 10 16 23 31 40 50
400  // 5 15 22 30 39 49 60
401  //
402  // This takes a couple of additions and a multiplication, which is a bit
403  // more expensive than something like an exclusive or, but the quality
404  // improvement outweighs the added expense.
405  x += y;
406  return y + x * (x + 1) / 2;
407  }
408 
409  size_t _state = 0;
410  bool _didOne = false;
411 };
412 
504 class TfHash {
505 public:
508  template <class T>
509  auto operator()(T &&obj) const ->
510  decltype(Tf_HashImpl(std::declval<Tf_HashState &>(),
511  std::forward<T>(obj), 0), size_t()) {
512  Tf_HashState h;
513  Tf_HashImpl(h, std::forward<T>(obj), 0);
514  return h.GetCode();
515  }
516 
518  template <class... Args>
519  static size_t Combine(Args &&... args) {
520  Tf_HashState h;
521  _CombineImpl(h, std::forward<Args>(args)...);
522  return h.GetCode();
523  }
524 
525 private:
526  template <class HashState, class T, class... Args>
527  static void _CombineImpl(HashState &h, T &&obj, Args &&... rest) {
528  Tf_HashImpl(h, std::forward<T>(obj), 0);
529  _CombineImpl(h, std::forward<Args>(rest)...);
530  }
531 
532  template <class HashState>
533  static void _CombineImpl(HashState &h) {
534  // base case does nothing
535  }
536 };
537 
540  size_t operator()(const char* ptr) const;
541 };
542 
545  size_t operator()(const char* ptr) const;
546 };
547 
550  bool operator()(const char* lhs, const char* rhs) const;
551 };
552 
553 PXR_NAMESPACE_CLOSE_SCOPE
554 
555 #endif
A hash function object that hashes the address of a char pointer.
Definition: hash.h:539
A structure that wraps a char pointer, indicating intent that it should be hashed as a c-style null t...
Definition: hash.h:183
A user-extensible hashing mechanism for use with runtime hash tables.
Definition: hash.h:504
static size_t Combine(Args &&... args)
Produce a hash code by combining the hash codes of several objects.
Definition: hash.h:519
auto operator()(T &&obj) const -> decltype(Tf_HashImpl(std::declval< Tf_HashState & >(), std::forward< T >(obj), 0), size_t())
Produce a hash code for obj.
Definition: hash.h:509
A hash function object that hashes null-terminated c-string content.
Definition: hash.h:544
A function object that compares two c-strings for equality.
Definition: hash.h:549
size_t hash_value(const half h)
Overload hash_value for half.
Definition: half.h:45
TfCStrHashWrapper TfHashAsCStr(char const *cstr)
Indicate that a char pointer is intended to be hashed as a C-style null terminated string.
Definition: hash.h:197
A file containing basic constants and definitions.