00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef UT_HASH_H
00025 #define UT_HASH_H
00026
00027 #include <string.h>
00028
00029
00030
00031
00032
00033 #ifndef UT_TYPES_H
00034 #include "ut_types.h"
00035 #endif
00036
00037 #ifndef UTVECTOR_H
00038 #include "ut_vector.h"
00039 #endif
00040
00041 #include "ut_debugmsg.h"
00042 #include "ut_string_class.h"
00043
00044 #if _MSC_VER >= 1310
00045
00046 #pragma warning(disable: 4292)
00047 #endif
00048
00049
00050 template <class T> class hash_slot;
00051
00052 template <class T> class UT_GenericStringMap;
00053
00054 template <class T> class UT_GenericStringMap
00055 {
00056 public:
00057 UT_GenericStringMap(size_t expected_cardinality = 11);
00058 virtual ~UT_GenericStringMap();
00059
00060
00061 bool insert(const char* key, T value);
00062 bool insert(const UT_String & key, T value);
00063
00064 void set (const char* key, T val);
00065 void set (const UT_String & key, T val);
00066
00067
00068 T pick(const char* key) const;
00069 T pick(const UT_String & key) const;
00070
00071
00072 bool contains(const char* key, T val) const;
00073 bool contains(const UT_String & key, T val) const;
00074
00075
00076 void remove(const char* key, T );
00077 void remove(const UT_String & key, T );
00078 void clear();
00079
00080
00081
00082 const gchar ** list ();
00083
00084 UT_GenericVector<T>* enumerate(bool strip_null_values = true) const;
00085 UT_GenericVector<const UT_String*>* keys(bool strip_null_values = true) const;
00086
00087
00088 inline size_t size() const { return n_keys; }
00089
00090 class UT_Cursor
00091 {
00092 friend class UT_GenericStringMap<T>;
00093
00094 public:
00095 UT_Cursor(const UT_GenericStringMap<T> * owner)
00096 : m_d(owner), m_index(-1)
00097 {
00098
00099 }
00100
00101 ~UT_Cursor() { }
00102
00103
00104 inline const UT_String &key()
00105 {return m_d->_key(*this); }
00106 inline void make_deleted()
00107 {m_d->_make_deleted(*this); }
00108 inline const T first()
00109 { return m_d->_first(*this); }
00110 inline const T next()
00111 { return m_d->_next(*this); }
00112 inline const T prev()
00113 { return m_d->_prev(*this); }
00114 inline bool is_valid()
00115 { return (m_index != -1); }
00116
00117 private:
00118
00119 inline void _set_index(UT_sint32 i)
00120 { m_index = i; }
00121 inline UT_sint32 _get_index()
00122 { return m_index; }
00123
00124 const UT_GenericStringMap<T>* m_d;
00125 UT_sint32 m_index;
00126 };
00127
00128 friend class UT_Cursor;
00129
00130
00131 void purgeData(void)
00132 {
00133 UT_Cursor hc1(this);
00134 for ( T hval1 = hc1.first(); hc1.is_valid(); hval1 = hc1.next() ) {
00135 if (hval1) {
00136 hc1.make_deleted();
00137 delete hval1;
00138 }
00139 }
00140 };
00141
00142
00143 void freeData(void)
00144 {
00145 UT_Cursor hc1(this);
00146 for ( T hval1 = hc1.first(); hc1.is_valid(); hval1 = hc1.next() ) {
00147 if (hval1) {
00148 hc1.make_deleted();
00149 g_free((gpointer)(hval1));
00150 }
00151 }
00152 }
00153
00154 private:
00155 UT_GenericStringMap(const UT_GenericStringMap<T>&);
00156 void operator=(const UT_GenericStringMap<T>&);
00157
00158
00159 enum SM_search_type
00160 {
00161 SM_INSERT,
00162 SM_LOOKUP,
00163 SM_REORG
00164 };
00165
00166 void reorg(size_t slots_to_allocate);
00167 void grow();
00168
00169 void assign_slots(hash_slot<T>* p, size_t old_num_slots);
00170
00171 static size_t compute_reorg_threshold(size_t nslots);
00172
00173 bool too_full() const
00174 { return (n_keys + n_deleted) >= reorg_threshold; }
00175
00176 bool too_many_deleted() const
00177 { return n_deleted > (reorg_threshold / 4); }
00178
00179 bool exceeds_n_delete_threshold() const
00180 { return n_deleted > (reorg_threshold / 2); }
00181
00182 hash_slot<T>* find_slot(const UT_String& k,
00183 SM_search_type search_type,
00184 size_t& slot,
00185 bool& key_found,
00186 size_t& hashval,
00187 const void* v,
00188 bool* v_found,
00189 void* vi,
00190 size_t hashval_in) const;
00191
00192 hash_slot<T>* find_slot(const char * k,
00193 SM_search_type search_type,
00194 size_t& slot,
00195 bool& key_found,
00196 size_t& hashval,
00197 const void* v,
00198 bool* v_found,
00199 void* vi,
00200 size_t hashval_in) const;
00201
00202
00203 const T _first(UT_Cursor& c) const;
00204 const T _next(UT_Cursor& c) const;
00205 const T _prev(UT_Cursor& c) const;
00206 const UT_String& _key(UT_Cursor& c) const;
00207 void _make_deleted(UT_Cursor& c) const;
00208
00209
00210 hash_slot<T>* m_pMapping;
00211
00212 size_t n_keys;
00213 size_t n_deleted;
00214 size_t m_nSlots;
00215 size_t reorg_threshold;
00216 size_t flags;
00217
00218 gchar ** m_list;
00219 };
00220
00221 #if 0 //def _MSC_VER // have to intialise the templates in order to have class exported
00222
00223 struct _dataItemPair;
00224 template class ABI_EXPORT UT_GenericStringMap<struct _dataItemPair*>;
00225
00226 class UT_UTF8String;
00227 template class ABI_EXPORT UT_GenericStringMap<UT_UTF8String *>;
00228
00229 class PD_Style;
00230 template class ABI_EXPORT UT_GenericStringMap<PD_Style *>;
00231
00232 class GR_Font;
00233 template class ABI_EXPORT UT_GenericStringMap<GR_Font *>;
00234
00235 template class ABI_EXPORT UT_GenericStringMap<char *>;
00236
00237
00238 #endif
00239
00240
00241
00242 class ABI_EXPORT UT_StringPtrMap : public UT_GenericStringMap<void const *> {
00243 public:
00244 UT_StringPtrMap(size_t expected_cardinality = 11)
00245 : UT_GenericStringMap<void const *>(expected_cardinality)
00246 {}
00247 };
00248
00249
00250
00251
00252 ABI_EXPORT UT_uint32 _Recommended_hash_size(UT_uint32 size);
00253
00254
00255
00256 class ABI_EXPORT key_wrapper
00257 {
00258 public:
00259 key_wrapper()
00260 : m_hashval(0) { }
00261
00262 void die()
00263 { m_val.clear(); }
00264
00265 bool eq(const UT_String &key) const
00266 {
00267 return (m_val == key);
00268 }
00269
00270 bool eq(const char *key) const
00271 {
00272 return (!strcmp(m_val.c_str(),key));
00273 }
00274
00275 void operator=(const UT_String &k)
00276 { m_val = k; }
00277
00278 UT_uint32 hashval() const
00279 { return m_hashval; }
00280 void set_hashval(UT_uint32 h)
00281 { m_hashval = h; }
00282
00283 UT_String &value(void)
00284 {return m_val;}
00285
00286 void operator=(const key_wrapper& rhs)
00287 { m_val = rhs.m_val; m_hashval = rhs.m_hashval; }
00288
00289 static UT_uint32 compute_hash(const UT_String &key)
00290 {
00291 return hashcode(key);
00292 }
00293
00294 static UT_uint32 compute_hash(const char *key)
00295 {
00296 return hashcode(key);
00297 }
00298
00299 private:
00300 UT_String m_val;
00301 UT_uint32 m_hashval;
00302 };
00303
00304
00305
00306
00307
00308 template <class T> class hash_slot
00309 {
00310 public:
00311 hash_slot()
00312 : m_value(0) { }
00313
00314 void make_deleted()
00315 {
00316
00317 m_value = reinterpret_cast<T>(this);
00318 m_key.die();
00319 }
00320 void make_empty()
00321 { m_value = 0; }
00322
00323 const T value() const
00324 { return m_value; }
00325
00326 void insert(const T v, const UT_String &k, UT_uint32 h)
00327 {
00328 m_value = v;
00329 m_key = k;
00330 m_key.set_hashval(h);
00331 }
00332
00333 void assign(hash_slot<T>* s)
00334 {
00335 m_value = s->value();
00336 m_key = s->m_key;
00337 }
00338
00339 bool empty() const
00340 { return (m_value == 0); }
00341
00342 bool deleted() const
00343 {
00344 return static_cast<const void*>(this) == m_value;
00345 }
00346
00347 bool key_eq(const UT_String &test, size_t h) const
00348 {
00349 #if 1
00350 UT_UNUSED(h);
00351 return m_key.eq(test);
00352 #else
00353 return m_key.hashval() == h;
00354 #endif
00355 }
00356
00357 bool key_eq(const char *test, size_t h) const
00358 {
00359 #if 1
00360 UT_UNUSED(h);
00361 return m_key.eq(test);
00362 #else
00363 return m_key.hashval() == h;
00364 #endif
00365 }
00366
00367 T m_value;
00368 key_wrapper m_key;
00369 };
00370
00376 template <class T>
00377 UT_GenericStringMap<T>::UT_GenericStringMap(size_t expected_cardinality)
00378 : n_keys(0),
00379 n_deleted(0),
00380 m_nSlots(_Recommended_hash_size(expected_cardinality)),
00381 reorg_threshold(compute_reorg_threshold(m_nSlots)),
00382 flags(0),
00383 m_list(0)
00384 {
00385 m_pMapping = new hash_slot<T>[m_nSlots];
00386 }
00387
00388
00389 template <class T>
00390 UT_GenericStringMap<T>::~UT_GenericStringMap()
00391 {
00392 DELETEPV(m_pMapping);
00393 FREEP(m_list);
00394 }
00395
00396
00397
00398
00399 template <class T>
00400 const gchar ** UT_GenericStringMap<T>::list()
00401 {
00402 if (!m_list)
00403 {
00404 m_list = reinterpret_cast<gchar **>(g_try_malloc (2 * (n_keys + 1) * sizeof (gchar *)));
00405 if (m_list == 0)
00406 return 0;
00407
00408 UT_uint32 index = 0;
00409
00410 UT_Cursor c(this);
00411
00412 for (const gchar * value = (gchar*)(c.first ());
00413 c.is_valid ();
00414 value = (gchar*)(c.next ()))
00415 {
00416 const char * key = c.key().c_str ();
00417
00418 if (!key || !value)
00419 continue;
00420
00421 m_list[index++] = static_cast<gchar *>(const_cast<char *>(key));
00422 m_list[index++] = static_cast<gchar *>(const_cast<char *>(value));
00423 }
00424 m_list[index++] = NULL;
00425 m_list[index ] = NULL;
00426 }
00427 return const_cast<const gchar **>(m_list);
00428 }
00429
00434 template <class T>
00435 T UT_GenericStringMap<T>::pick(const char* k) const
00436 {
00437 hash_slot<T>* sl = 0;
00438 bool key_found = false;
00439 size_t slot;
00440 size_t hashval;
00441
00442 sl = find_slot(k, SM_LOOKUP, slot, key_found, hashval, 0, 0, 0, 0);
00443 return key_found ? sl->value() : 0;
00444 }
00445
00446 template <class T>
00447 T UT_GenericStringMap<T>::pick(const UT_String & k) const
00448 {
00449 return pick (k.c_str());
00450 }
00451
00457 template <class T>
00458 bool UT_GenericStringMap<T>::contains(const char* k, T v) const
00459 {
00460 UT_String aKey(k);
00461 return contains (aKey, v);
00462 }
00463
00464 template <class T>
00465 bool UT_GenericStringMap<T>::contains(const UT_String& k, T v) const
00466 {
00467
00468 bool key_found = false;
00469 bool v_found = false;
00470 size_t slot = 0;
00471 size_t hashval = 0;
00472
00473
00474 find_slot (k, SM_LOOKUP, slot, key_found,
00475 hashval, v, &v_found, 0, 0);
00476 return v_found;
00477 }
00478
00479
00483 template <class T>
00484 bool UT_GenericStringMap<T>::insert(const char* key, T value)
00485 {
00486 UT_String aKey(key);
00487 return insert (aKey, value);
00488 }
00489
00490 template <class T>
00491 bool UT_GenericStringMap<T>::insert(const UT_String& key, T value)
00492 {
00493 FREEP(m_list);
00494
00495 size_t slot = 0;
00496 bool key_found = false;
00497 size_t hashval = 0;
00498
00499 hash_slot<T>* sl = find_slot(key, SM_INSERT, slot, key_found,
00500 hashval, 0, 0, 0, 0);
00501
00502 if(key_found)
00503 return false;
00504
00505 sl->insert(value, key, hashval);
00506 ++n_keys;
00507
00508 if (too_full())
00509 {
00510 if (too_many_deleted())
00511 {
00512 reorg(m_nSlots);
00513 }
00514 else
00515 {
00516 grow();
00517 }
00518 }
00519
00520 return true;
00521 }
00522
00527 template <class T>
00528 void UT_GenericStringMap<T>::set(const char* key, T value)
00529 {
00530 UT_String aKey(key);
00531 set (aKey, value);
00532 }
00533
00534 template <class T>
00535 void UT_GenericStringMap<T>::set(const UT_String& key, T value)
00536 {
00537 FREEP(m_list);
00538
00539 size_t slot = 0;
00540 bool key_found = false;
00541 size_t hashval = 0;
00542
00543 hash_slot<T>* sl = find_slot(key, SM_LOOKUP, slot, key_found,
00544 hashval, 0, 0, 0, 0);
00545
00546 if (!sl || !key_found)
00547 {
00548 insert(key, value);
00549 return;
00550
00551 }
00552
00553 sl->insert(value, key, hashval);
00554 }
00555
00560 template <class T>
00561 UT_GenericVector<T>* UT_GenericStringMap<T>::enumerate (bool strip_null_values) const
00562 {
00563 UT_GenericVector<T> * pVec = new UT_GenericVector<T>(size());
00564
00565 UT_Cursor cursor(this);
00566
00567 T val = NULL;
00568
00569 for (val = cursor.first(); cursor.is_valid(); val = cursor.next ())
00570 {
00571
00572
00573 if (!strip_null_values || val)
00574 {
00575 pVec->addItem (val);
00576 }
00577 }
00578
00579 return pVec;
00580 }
00581
00586 template <class T>
00587 UT_GenericVector<const UT_String*>* UT_GenericStringMap<T>::keys (bool strip_null_values) const
00588 {
00589 UT_GenericVector<const UT_String*>* pVec = new UT_GenericVector<const UT_String*>(size());
00590
00591 UT_Cursor cursor(this);
00592
00593 T val = NULL;
00594
00595 for (val = cursor.first(); cursor.is_valid(); val = cursor.next ())
00596 {
00597
00598
00599 if (!strip_null_values || val)
00600 {
00601 pVec->addItem (&cursor.key());
00602 }
00603 }
00604
00605 return pVec;
00606 }
00607
00608
00612 template <class T>
00613 void UT_GenericStringMap<T>::remove(const char* key, T)
00614 {
00615 UT_String aKey(key);
00616 remove (aKey, 0);
00617 }
00618
00619 template <class T>
00620 void UT_GenericStringMap<T>::remove(const UT_String& key, T)
00621 {
00622 FREEP(m_list);
00623
00624 size_t slot = 0, hashval;
00625 bool bFound = false;
00626 hash_slot<T>* sl = find_slot(key, SM_LOOKUP, slot, bFound,
00627 hashval, 0, 0, 0, 0);
00628
00629 if (bFound)
00630 {
00631 sl->make_deleted();
00632 --n_keys;
00633 ++n_deleted;
00634 if (m_nSlots > 11 && m_nSlots / 4 >= n_keys)
00635 {
00636 reorg(_Recommended_hash_size(m_nSlots/2));
00637 }
00638 }
00639 }
00640
00644 template <class T>
00645 void UT_GenericStringMap<T>::clear()
00646 {
00647 FREEP(m_list);
00648
00649 hash_slot<T>* the_slots = m_pMapping;
00650 for (size_t x=0; x < m_nSlots; x++)
00651 {
00652 hash_slot<T>& this_slot = the_slots[x];
00653 if (!this_slot.empty())
00654 {
00655 if (!this_slot.deleted())
00656 {
00657 this_slot.make_deleted();
00658 }
00659 this_slot.make_empty();
00660 }
00661 }
00662 n_keys = 0;
00663 n_deleted = 0;
00664 }
00665
00666
00667
00668
00669
00670 template <class T>
00671 void UT_GenericStringMap<T>::assign_slots(hash_slot<T>* p, size_t old_num_slot)
00672 {
00673 size_t target_slot = 0;
00674
00675 for (size_t slot_num=0; slot_num < old_num_slot; ++slot_num, ++p)
00676 {
00677 if (!p->empty() && !p->deleted())
00678 {
00679 bool kf = false;
00680
00681 size_t hv;
00682 hash_slot<T>* sl = find_slot(p->m_key.value(),
00683 SM_REORG,
00684 target_slot,
00685 kf,
00686 hv,
00687 0,
00688 0,
00689 NULL,
00690 p->m_key.hashval());
00691 sl->assign(p);
00692 }
00693 }
00694 }
00695
00696 template <class T>
00697 size_t UT_GenericStringMap<T>::compute_reorg_threshold(size_t nSlots)
00698 {
00699 return nSlots * 7 / 10;
00700 }
00701
00702
00703 template <class T> hash_slot<T>*
00704 UT_GenericStringMap<T>::find_slot(const UT_String& k,
00705 SM_search_type search_type,
00706 size_t& slot,
00707 bool& key_found,
00708 size_t& hashval,
00709 const void* v,
00710 bool* v_found,
00711 void* vi,
00712 size_t hashval_in) const
00713 {
00714 return find_slot( k.c_str(), search_type, slot, key_found, hashval, v, v_found, vi, hashval_in);
00715 }
00716
00717 template <class T> hash_slot<T>*
00718 UT_GenericStringMap<T>::find_slot(const char *k,
00719 SM_search_type search_type,
00720 size_t& slot,
00721 bool& key_found,
00722 size_t& hashval,
00723 const void* v,
00724 bool* v_found,
00725 void* ,
00726 size_t hashval_in) const
00727 {
00728 if ( m_nSlots == 0 )
00729 {
00730 key_found = false ; return NULL ;
00731 }
00732
00733 hashval = (hashval_in ? hashval_in : key_wrapper::compute_hash(k));
00734 int nSlot = hashval % m_nSlots;
00735
00736 xxx_UT_DEBUGMSG(("DOM: hashval for \"%s\" is %d (#%dth slot)\n", k, hashval, nSlot));
00737
00738 hash_slot<T>* sl = &m_pMapping[nSlot];
00739
00740 if (sl->empty())
00741 {
00742
00743 xxx_UT_DEBUGMSG(("DOM: empty slot\n"));
00744
00745 slot = nSlot;
00746 key_found = false;
00747 return sl;
00748 }
00749 else
00750 {
00751 if (search_type != SM_REORG &&
00752 !sl->deleted() &&
00753 sl->key_eq(k, hashval))
00754 {
00755 slot = nSlot;
00756 key_found = true;
00757
00758 if (v_found)
00759 {
00760
00761
00762
00763 if (v)
00764 {
00765 *v_found = (sl->value() == v);
00766 } else {
00767 *v_found = true;
00768 }
00769 }
00770
00771 xxx_UT_DEBUGMSG(("DOM: found something #1\n"));
00772
00773 return sl;
00774 }
00775 }
00776
00777 int delta = (nSlot ? (m_nSlots - nSlot) : 1);
00778 hash_slot<T>* tmp_sl = sl;
00779 sl = 0;
00780 size_t s = 0;
00781 key_found = false;
00782
00783 while (1)
00784 {
00785 nSlot -= delta;
00786 if (nSlot < 0)
00787 {
00788 nSlot += m_nSlots;
00789 tmp_sl += (m_nSlots - delta);
00790 }
00791 else
00792 {
00793 tmp_sl -= delta;
00794 }
00795
00796 if (tmp_sl->empty())
00797 {
00798 if (!s)
00799 {
00800 s = nSlot;
00801 sl = tmp_sl;
00802 }
00803 break;
00804
00805 }
00806
00807 if (tmp_sl->deleted())
00808 {
00809 if (!s)
00810 {
00811 s = nSlot;
00812 sl = tmp_sl;
00813 }
00814 }
00815 else if (search_type != SM_REORG && tmp_sl->key_eq(k, hashval))
00816 {
00817 s = nSlot;
00818 sl = tmp_sl;
00819 key_found = true;
00820
00821 if (v_found)
00822 {
00823 if (v)
00824 {
00825 *v_found = (sl->value() == v);
00826 } else {
00827 *v_found = true;
00828 }
00829 }
00830 break;
00831 }
00832 }
00833
00834 slot = s;
00835 return sl;
00836 }
00837
00838
00839 template <class T>
00840 void UT_GenericStringMap<T>::grow()
00841 {
00842 size_t slots_to_allocate = ::_Recommended_hash_size(m_nSlots / 2 + m_nSlots);
00843 reorg(slots_to_allocate);
00844 }
00845
00846
00847 template <class T>
00848 void UT_GenericStringMap<T>::reorg(size_t slots_to_allocate)
00849 {
00850 hash_slot<T>* pOld = m_pMapping;
00851
00852 if (slots_to_allocate < 11)
00853 {
00854 slots_to_allocate = 11;
00855 }
00856
00857 m_pMapping = new hash_slot<T>[slots_to_allocate];
00858
00859 const size_t old_num_slot = m_nSlots;
00860
00861 m_nSlots = slots_to_allocate;
00862 reorg_threshold = compute_reorg_threshold(m_nSlots);
00863
00864 assign_slots(pOld, old_num_slot);
00865 DELETEPV(pOld);
00866
00867 n_deleted = 0;
00868 }
00869
00870
00871 template <class T> const T
00872 UT_GenericStringMap<T>::_first(UT_Cursor& c) const
00873 {
00874 const hash_slot<T>* map = m_pMapping;
00875 size_t x;
00876 for (x=0; x < m_nSlots; ++x)
00877 {
00878 if (!map[x].empty() && !map[x].deleted())
00879 {
00880 break;
00881 }
00882 }
00883 if (x < m_nSlots)
00884 {
00885 c._set_index(x);
00886 return map[x].value();
00887 }
00888
00889 c._set_index(-1);
00890 return 0;
00891 }
00892
00893 template <class T>
00894 const UT_String& UT_GenericStringMap<T>::_key(UT_Cursor& c) const
00895 {
00896 hash_slot<T> & slot = m_pMapping[c._get_index()];
00897
00898
00899
00900
00901
00902
00903
00904
00905 UT_ASSERT_HARMLESS(!slot.empty() && !slot.deleted());
00906
00907 return slot.m_key.value();
00908 }
00909
00910 template <class T>
00911 void UT_GenericStringMap<T>::_make_deleted(UT_Cursor& c) const
00912 {
00913 hash_slot<T> & slot = m_pMapping[c._get_index()];
00914
00915 if (!slot.empty() && !slot.deleted())
00916 {
00917 slot.make_deleted();
00918 }
00919 }
00920
00921 template <class T> const T
00922 UT_GenericStringMap<T>::_next(UT_Cursor& c) const
00923 {
00924 const hash_slot<T>* map = m_pMapping;
00925 size_t x;
00926 for (x = c._get_index() + 1; x < m_nSlots; ++x)
00927 {
00928 if (!map[x].empty() && !map[x].deleted())
00929 {
00930 break;
00931 }
00932 }
00933 if (x < m_nSlots)
00934 {
00935 c._set_index(x);
00936 return map[x].value();
00937 }
00938
00939 c._set_index(-1);
00940 return 0;
00941 }
00942
00943
00944 template <class T> const T
00945 UT_GenericStringMap<T>::_prev(UT_Cursor& c) const
00946 {
00947 const hash_slot<T>* map = m_pMapping;
00948 UT_sint32 x;
00949 for (x = c._get_index() - 1; x >= 0; --x)
00950 {
00951 if (!map[x].empty() && !map[x].deleted())
00952 {
00953 break;
00954 }
00955 }
00956 if (x >= 0)
00957 {
00958 c._set_index(x);
00959 return map[x].value();
00960 }
00961
00962 c._set_index(-1);
00963 return 0;
00964 }
00965
00966
00967 #endif