一步一步剖析Dictionary實現原理

  • 2019 年 10 月 10 日
  • 筆記

  本文是對c#中Dictionary內部實現原理進行簡單的剖析。如有表述錯誤,歡迎指正。

  主要對照源碼來解析,目前對照源碼的版本是.Net Framwork 4.8,源碼地址

1. 關鍵的字段和Entry結構

        struct Entry          {              public int hashCode;    // key的hashCode & 0x7FFFFFFF              public int next;            // 指向鏈表下一個元素的地址(實際就是entries的索引),最後一個元素為-1              public TKey key;              public TValue value;          }          Entry[] entries;        //存放鍵值          int[] buckets;          //存儲entries最新元素的索引,其存儲位置由取模結果決定。例:假設鍵值存儲在entries的第1元素的位置上,且hashCode和長度的取模結果為2,那麼buckets[2] = 1          int count = 0;         //已存儲鍵值的個數          int version;             //記錄版本,防止迭代過程中集合被更改          IEqualityComparer<TKey> _comparer;          int freeList;             //entries中最新空元素的索引          int freeCount;         //entries中空元素的個數

2. 添加鍵值(Add)

        public void Add(TKey key, TValue value) {              Insert(key, value, true);          }              private void Insert(TKey key, TValue value, bool add) {                if( key == null ) {                  ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key);              }              if (buckets == null) Initialize(0);              int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;              //取模              int targetBucket = hashCode % buckets.Length;  #if FEATURE_RANDOMIZED_STRING_HASHING              int collisionCount = 0;  #endif              for (int i = buckets[targetBucket]; i >= 0; i = entries[i].next) {                  if (entries[i].hashCode == hashCode &&  comparer.Equals(entries[i].key, key)) {                      if (add) {                           ThrowHelper.ThrowArgumentException(ExceptionResource.Argument_AddingDuplicate);                      }                      //對於已存在的Key重新賦值                      entries[i].value = value;                      version++;                      return;                  }  #if FEATURE_RANDOMIZED_STRING_HASHING                  collisionCount++;  #endif              }              int index;              if (freeCount > 0) {                  //存在entries中存在空元素                  index = freeList;                  freeList = entries[index].next;                  freeCount--;              }              else {                  if (count == entries.Length)                  {                      //擴容:取大於count * 2的最小素數作為entries和bucket的新容量(即數組長度.Length)                      Resize();                      targetBucket = hashCode % buckets.Length;                  }                  index = count;                  count++;              }              entries[index].hashCode = hashCode;              entries[index].next = buckets[targetBucket];              entries[index].key = key;              entries[index].value = value;              //存取鏈表的頭元素的索引(即entries最後存入的元素的在enties中的索引)              //便於取Key的時每次從鏈表的頭元素開始遍歷,詳細見FindEntry(TKey key)函數              buckets[targetBucket] = index;              version++;  #if FEATURE_RANDOMIZED_STRING_HASHING  #if FEATURE_CORECLR              // In case we hit the collision threshold we'll need to switch to the  comparer which is using randomized string hashing              // in this case will be EqualityComparer<string>.Default.              // Note, randomized string hashing is turned on by default on coreclr so  EqualityComparer<string>.Default will              // be using randomized string hashing              if (collisionCount > HashHelpers.HashCollisionThreshold && comparer ==  NonRandomizedStringEqualityComparer.Default)              {                  comparer = (IEqualityComparer<TKey>)  EqualityComparer<string>.Default;                  Resize(entries.Length, true);              }  #else              if(collisionCount > HashHelpers.HashCollisionThreshold &&  HashHelpers.IsWellKnownEqualityComparer(comparer))              {                  //如果碰撞次數(單鏈表長度)大於設置的最大碰撞閾值,需要擴容                  comparer = (IEqualityComparer<TKey>)  HashHelpers.GetRandomizedEqualityComparer(comparer);                  Resize(entries.Length, true);              }  #endif // FEATURE_CORECLR  #endif          }    ******************************************************************************************************************************************          static void Foo()          {              var dicData = new Dictionary<int, int>();        //添加鍵值              new List<int> { 1, 2, 4 }.ForEach(item => Add(item, dicData));              new List<int> { 22, 29, 36, 20 }.ForEach(item => Add(item, dicData));          }          static void Add(int key, Dictionary<int, int> dicData)          {              dicData.Add(key, key);          }

 

2.1 數組entries和buckets初始化

 

 

 

 2.2 添加鍵值{1,1},則

    hashCode = 1;    targetBucket = hasCode % buckets.Length;         //targetBucket = 1      next = buckets[targetBucket];                               //next = -1      buckets[targetBucket] = index;                             //buckets[1] = 0 

 

 

 2.3 添加鍵值{2,2},則

    hashCode = 2;    targetBucket = hasCode % buckets.Length;         //targetBucket = 2      next = buckets[targetBucket];                               //next = -1      buckets[targetBucket] = index;                              //buckets[2] = 1

 

 

 2.4 添加鍵值{4,4},則

    hashCode = 4;      targetBucket = hasCode % buckets.Length;         //targetBucket = 1      next = buckets[targetBucket];                               //next = 0      buckets[targetBucket] = index;                              //buckets[1] = 2

 

接下來將entries數組以單鏈表的形式呈現(即enteries數組橫向);

 2.5 在繼續添加鍵值之前,需要擴容操作,因為entries數組長度為3且都已有元素。擴容後需要對buckets和entries每個元素的Next需要重新賦值;

            private void Resize(int newSize, bool forceNewHashCodes) {              Contract.Assert(newSize >= entries.Length);              //實例化buckets,並將每個元素置為-1              int[] newBuckets = new int[newSize];              for (int i = 0; i < newBuckets.Length; i++) newBuckets[i] = -1;              Entry[] newEntries = new Entry[newSize];              Array.Copy(entries, 0, newEntries, 0, count);              //如果是Hash碰撞擴容,使用新HashCode函數重新計算Hash值              if(forceNewHashCodes) {                  for (int i = 0; i < count; i++) {                      if(newEntries[i].hashCode != -1) {                          newEntries[i].hashCode =  (comparer.GetHashCode(newEntries[i].key) & 0x7FFFFFFF);                      }                  }              }              //重建單鏈表              for (int i = 0; i < count; i++) {                  if (newEntries[i].hashCode >= 0) {                      //取模重新設置next值和buckets                      int bucket = newEntries[i].hashCode % newSize;                      newEntries[i].next = newBuckets[bucket];                      newBuckets[bucket] = i;                  }              }              buckets = newBuckets;              entries = newEntries;          }

 2.6 繼續添加鍵值{22,22},{29,29},{36,36},{40,40},添加完後其內部存儲結果如下

 3. 取Key值(dic[22])

     public TValue this[TKey key] {              get {                  //取Key對應值在entries的索引                  int i = FindEntry(key);                  if (i >= 0) return entries[i].value;                  ThrowHelper.ThrowKeyNotFoundException();                  return default(TValue);              }              set {                  //更新Key對應的值                  Insert(key, value, false);              }          }        private int FindEntry(TKey key) {              if( key == null) {                  ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key);              }              if (buckets != null) {                  int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;                  //遍歷單鏈表                  for (int i = buckets[hashCode % buckets.Length]; i >= 0; i =  entries[i].next) {                      if (entries[i].hashCode == hashCode &&  comparer.Equals(entries[i].key, key)) return i;                  }              }              return -1;          }  *********************************************************************************************          static void Foo()          {              ......              //取Key=22              var val =dicData[22];  
}

簡化取Key對應值的代碼

    var hashCode =comparer.GetHashCode(key) & 0x7FFFFFFF;   // 22      var targetBuget = hashCode % buckets.Length;            //取模運算 1        var i = bucket[targetBuget];                            //鏈表頭元素的索引 bucket[1] = 5      //遍歷單鏈表      for (; i >= 0; i =  entries[i].next) {          if (entries[i].hashCode == hashCode &&  comparer.Equals(entries[i].key, key)) return i;      }

 4. 移除鍵值(Remove)

        public bool Remove(TKey key) {              if(key == null) {                  ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key);              }              if (buckets != null) {                  int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;                  int bucket = hashCode % buckets.Length;                  int last = -1;                  //其原理先取出鍵值,然後記錄entries空閑的索引(freeList)和空閑個數(freeCount)                  for (int i = buckets[bucket]; i >= 0; last = i, i = entries[i].next)  {                      if (entries[i].hashCode == hashCode &&  comparer.Equals(entries[i].key, key)) {                          if (last < 0) {                              buckets[bucket] = entries[i].next;                          }                          else {                              entries[last].next = entries[i].next;                          }                          entries[i].hashCode = -1;                          //建立空閑鏈表                          entries[i].next = freeList;                          entries[i].key = default(TKey);                          entries[i].value = default(TValue);                          //保存entryies中空元素的索引                          //便於插入新鍵值時,放在當前索引的位置,減少entryies空間上的浪費                          freeList = i;                          //空元素的個數加1                          freeCount++;                          version++;                          return true;                      }                  }              }              return false;          }  *******************************************************************          static void Foo()          {              ......              //移除              new List<int> { 22, 29 }.ForEach(item => dicData.Remove(item));          } 

4.1 移除Key=22後,freeList = 3, freeCount = 1,

 4.2 移除Key=36後,freeList = 5, freeCount = 2, 

 

 

 5. 再插入鍵值

如上圖,當移除掉{36,36}後,會發現又誕生一個含有兩個元素的“新鏈表”(上圖灰色框)。這個作用就是為了插入新鍵值時,按照“新鏈表”記錄的索引順序插入到entries數組中。
例:添加鍵值{22,22},{25,25},此時freeList = 5,freeCount = 2;
  1. 給entries[5]賦值,freeList = 3, freeCount = 1;
  2. 給entries[3]賦值,freeList = -1, freeCount = 0;

 

 希望此文能夠讓你對於Dictionary內部實現有所認識。