2.3.3 小对象

发布于 2024-10-12 19:15:56 字数 11897 浏览 0 评论 0 收藏 0

在 mcache 里使用数组存储不同类别（sizeclass）的 span 内存块。

// mcache.go

// Per-thread (in Go, per-P) cache for small objects.
type mcache struct {
	// spans to allocate from, indexed by spanClass
	alloc [numSpanClasses]*mspan 
}

// malloc.go

// Allocate an object of size bytes.
// Small objects are allocated from the per-P cache's free lists.
// Large objects (> 32 kB) are allocated straight from the heap.

func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {

	c := getMCache(mp)

    var span *mspan
	var x unsafe.Pointer
	noscan := typ == nil || typ.ptrdata == 0
    
	if size <= maxSmallSize {
		if noscan && size < maxTinySize {
			// tiny ...
		} else {
            
            // 等级。
			var sizeclass uint8
			if size <= smallSizeMax-8 {
				sizeclass = size_to_class8[divRoundUp(size, smallSizeDiv)]
			} else {
				sizeclass = size_to_class128[divRoundUp(size-smallSizeMax, largeSizeDiv)]
			}
            
            // 索引。
			size = uintptr(class_to_size[sizeclass])
			spc := makeSpanClass(sizeclass, noscan)
            
            // 从 mcache 提取 mspan 块。
			span = c.alloc[spc]
            
            // 从 span 提取 object 内存。
			v := nextFreeFast(span)
			if v == 0 {
				v, span, shouldhelpgc = c.nextFree(spc)
			}
			x = unsafe.Pointer(v)
		}
	} else {
        // large ...
	}

	return x
}

索引

数组（mcache.alloc）长度是类别（sizeclass）的 2 倍。

每种类别按是否包含指针分 scan 和 noscan 两种，如此有助于 GC 优化。

// mheap.go

numSpanClasses = _NumSizeClasses << 1    // 68 << 1

所对应数组索引按以下算法计算：

noscan    := typ == nil || typ.ptrdata == 0
spanclass := (sizeclass << 1) | noscan

// mheap.go

func makeSpanClass(sizeclass uint8, noscan bool) spanClass {
	return spanClass(sizeclass<<1) | spanClass(bool2int(noscan))
}

例如 sizeclass = 3，那么实际索引：

scan : (0b11 << 1) | 0 == 0b110 == 6
noscan : (0b11 << 1) | 1 == 0b111 == 7

提取

每个 mspan 仅为一种 sizeclass 服务，所以内部 object 等长。

// mheap.go

type mspan struct {

	startAddr uintptr // address of first byte of span aka s.base()
	npages    uintptr // number of pages in span
	nelems    uintptr // number of object in the span.

	freeindex  uintptr
	allocCache uint64

	allocBits  *gcBits  // 分配标记位图。
	gcmarkBits *gcBits  // 回收标记位图。

    allocCount  uint16        // number of allocated objects
	spanclass   spanClass     // size class and noscan (uint8)
	state       mSpanStateBox // mSpanInUse etc
}

依据 allocBits 位图，将整个 mspan 内存当作 object 数组对待，通过索引计算偏移量。

// malloc.go

// nextFree returns the next free object from the cached span if one is available.
// Otherwise it refills the cache with a span with an available object and
// returns that object along with a flag indicating that this was a heavy
// weight allocation. 

func (c *mcache) nextFree(spc spanClass) (v gclinkptr, s *mspan, shouldhelpgc bool) {

    // 获取下一可用 object 索引。
    s = c.alloc[spc]
	freeIndex := s.nextFreeIndex()
    
    // 没有可用空间。
	if freeIndex == s.nelems {
		// 当前 mspan 已全部分配，没有剩余空间。
        // 从 mcentral 获取新 mspan 放入 alloc 数组.
		c.refill(spc)
		s = c.alloc[spc]

        // 重试。
		freeIndex = s.nextFreeIndex()
	}

    // 通过索引和起始地址，计算实际地址。
	v = gclinkptr(freeIndex*s.elemsize + s.base())
	return
}

// mbitmap.go

// nextFreeIndex returns the index of the next free object in s at
// or after s.freeindex.

func (s *mspan) nextFreeIndex() uintptr {

    sfreeindex := s.freeindex
    snelems := s.nelems
  
    // 已抵达尾部，没有可用空间。
    if sfreeindex == snelems {
        return sfreeindex
    }

    // 从缓存查找。
    aCache := s.allocCache
    bitIndex := sys.Ctz64(aCache)
  
    // 整个缓存无可用位，刷新缓存。（如刷新后依然无可用，循环刷新）
    for bitIndex == 64 {
    
        // 计算填充位置，或超出当前 span 范围。
        sfreeindex = (sfreeindex + 64) &^ (64 - 1)
        if sfreeindex >= snelems {
            s.freeindex = snelems
            return snelems
        }
    
        whichByte := sfreeindex / 8

        // 填充缓存，重试。
        s.refillAllocCache(whichByte)
        aCache = s.allocCache
        bitIndex = sys.Ctz64(aCache)
    }
  
    // 基于位图，计算索引。
    result := sfreeindex + uintptr(bitIndex)
    if result >= snelems {
        s.freeindex = snelems
        return snelems
    }

    // 右移缓存，更新 freeindex。
    s.allocCache >>= uint(bitIndex + 1)
    sfreeindex = result + 1
    s.freeindex = sfreeindex
  
    return result
}

算法 nextFreeIndex 的关键是 freeindex 和 allocCache 工作方式。

                       freeindex           
                           |           
                           v           
              +---//---+---+---+---+---+-----//----+
    allocBits |  ...   | 1 | 0 | . | 0 |     ...   |
              +---//---+---+---+---+---+-----//----+
                           |           |
                           |           |
                           +---+---+---+
                allocCache | 1 | . | 1 |
                           +---+---+---+

freeindex: 存储上次扫描位置，本次扫描以此为起点。
allocCache: 缓存 freeindex 后的部分数据，以提高扫描效率。

作为缓存的 allocCache 从 freeindex 起，缓存 allocBits 64 个二进制标记位。
相比字节位图，作为一个 uint64 的缓存，可用寄存器直接存储，有更好访问效率。
从 allocCache 检索到可用位置，加上 freeindex 就是在 allocBits 位图里的实际索引。
注意：
allocBits 由 gcmarkBits 交换而来，内部存储是垃圾回收标记结果，故 0 才是可用位置。
调用 refillAllocCache 填充 allocCache 时，进行反转，1 表示可用。
因为 allocCache 通过统计低位 0 数量找到第一可用位 1。
找到可用位后，allocCache 右移，剔除掉已扫描过的低位。
如此，下次可继续通过统计低位 0 查找可用位。
右移同时，须更新 freeindex。

// internal/sys/intrinsics.go 

// Ctz64 counts trailing (low-order) zeroes,
// and if all are zero, then 64.

func Ctz64(x uint64) int {
	x &= -x                       // isolate low-order bit
	y := x * deBruijn64ctz >> 58  // extract part of deBruijn sequence
	i := int(deBruijnIdx64ctz[y]) // convert to bit index
	z := int((x - 1) >> 57 & 64)  // adjustment if zero
	return i + z
}

示例：

               H           L
  allocCache:  11...11000100     // 统计低位 0 数量，找到第一个 1。

1. 统计（Ctz64）低位（L）二进制 0 数量，确定第一个可用位 bitIndex = 2。
2. 计算基于位图的实际索引 result = freeindex + bitIndex。
3. 右移 allocCache，剔除掉已扫描过的低位。allocCache >> (bitIndex + 1)，含本次。
4. 更新 freeindex，否则下次计算偏移位置错误。

               H           L
  allocCache:  00011...11000     // 右移剔除掉低位 100 后的结果。

正因为右移操作，才需要在填充缓存时反转。毕竟 0000 怎么右移都没有变化，1111 就成了 0111。

优先使用快速版本，仅检查缓存。没有无关调用，不填充，不扩容，一切为了性能。

// malloc.go

// nextFreeFast returns the next free object if one is quickly available.
func nextFreeFast(s *mspan) gclinkptr {
    
    // 直接查找缓存。
	theBit := sys.Ctz64(s.allocCache) 
	if theBit < 64 {
        
        // 计算实际索引。
		result := s.freeindex + uintptr(theBit)
		if result < s.nelems {
			freeidx := result + 1
			if freeidx%64 == 0 && freeidx != s.nelems {
				return 0
			}
            
            // 右移缓存，刷新 freeindex。
			s.allocCache >>= uint(theBit + 1)
			s.freeindex = freeidx
			s.allocCount++
            
			return gclinkptr(result*s.elemsize + s.base())
		}
	}
    
	return 0
}

扩容

如果 nextFree 发现 mspan 没有剩余空间，那么从 central 扩容。

// mcache.go

// refill acquires a new span of span class spc for c. This span will
// have at least one free object. The current span in c must be full.

func (c *mcache) refill(spc spanClass) {
    
	// 当前 span，交还 central。
	s := c.alloc[spc]
	if s != &emptymspan {
		mheap_.central[spc].mcentral.uncacheSpan(s)
	}

	// 从 central 获取新的 span.
	s = mheap_.central[spc].mcentral.cacheSpan()
	c.alloc[spc] = s
}

与 mcache 类似， mheap.central 数组同样包含 scan、noscan 两类。

// mheap.go

type mheap struct {
    
	// central free lists for small size classes.
	// central is indexed by spanClass.
	central [numSpanClasses]struct {
		mcentral mcentral
	}
}

每个 mcentral 按是否有剩余空间，管理 partial 和 full 列表。

另外，partial 和 full 都有两个集合，分别表示已清理（swept）和未清理（unswept）。

// mcentral.go

// Central list of free objects of a given size.
type mcentral struct {
	spanclass spanClass

	// partial and full contain two mspan sets: one of swept in-use
	// spans, and one of unswept in-use spans. 
    
	partial [2]spanSet // list of spans with a free object
	full    [2]spanSet // list of spans with no free objects
}

访问时：
swept =partial[sweepgen/2%2]
unswept = partial[1-sweepgen/2%2]
随着回收代龄的增加（+2/per），两个集合角色会互换。
gen 0: swept = 0/2%2 = 0, unswept = 1 - 0/2%2 = 1
gen 2: swept = 2/2%2 = 1, unswept = 1 - 2/2%2 = 0
gen 4: swept = 4/2%2 = 0, unswept = 1 - 4/2%2 = 1

// mheap.go

type mspan struct {
    
	// sweep generation:
	// if sweepgen == h->sweepgen - 2, the span needs sweeping
	// if sweepgen == h->sweepgen - 1, the span is currently being swept
	// if sweepgen == h->sweepgen, the span is swept and ready to use
	// if sweepgen == h->sweepgen + 1, the span was cached before sweep began and is still cached, and needs sweeping
	// if sweepgen == h->sweepgen + 3, the span was swept and then cached and is still cached
	// h->sweepgen is incremented by 2 after every GC

	sweepgen    uint32    
}

// mcentral.go

// partialUnswept returns the spanSet which holds partially-filled
// unswept spans for this sweepgen.
func (c *mcentral) partialUnswept(sweepgen uint32) *spanSet {
	return &c.partial[1-sweepgen/2%2]
}

// partialSwept returns the spanSet which holds partially-filled
// swept spans for this sweepgen.
func (c *mcentral) partialSwept(sweepgen uint32) *spanSet {
	return &c.partial[sweepgen/2%2]
}

扩容时，依次尝试 partial swept、partial unswept、full unswept 集合，直到从 heap 重新分配。

鉴于期间清理操作动作较大，未避免花费太多时间，采取了尝试总次数限制。

// mcentral.go

// Allocate a span to use in an mcache.
func (c *mcentral) cacheSpan() *mspan {

	// 尝试总次数限制。
    // 超出，则直接分配新的 span。
    // 避免查找可用空间花费太多时间。
    spanBudget := 100

	var s *mspan

    // 尝试 1: partial swept。
	sg := mheap_.sweepgen
	if s = c.partialSwept(sg).pop(); s != nil {
		goto havespan
	}

	sl = sweep.active.begin()
	if sl.valid {
        
        // 尝试 2: partial unswept.
		for ; spanBudget >= 0; spanBudget-- {
			s = c.partialUnswept(sg).pop()
			if s == nil {
				break
			}
            
            // 清理。
			if s, ok := sl.tryAcquire(s); ok {
				s.sweep(true)
				sweep.active.end(sl)
				goto havespan
			}
		}
        
        // 尝试 3: full unswept.（已清理的 full，没必要尝试）
		for ; spanBudget >= 0; spanBudget-- {
			s = c.fullUnswept(sg).pop()
			if s == nil {
				break
			}
            
            // 清理。
			if s, ok := sl.tryAcquire(s); ok {
				s.sweep(true)
                
				// 标记可用位置。
				freeIndex := s.nextFreeIndex()
				if freeIndex != s.nelems {
					s.freeindex = freeIndex
					sweep.active.end(sl)
					goto havespan
				}
                
				// 没有可用空间，转移到已清理列表。
				c.fullSwept(sg).push(s.mspan)
			}
		}
        
		sweep.active.end(sl)
	}

	// 依然没有，从 heap 获取。
	s = c.grow()
	if s == nil {
		return nil
	}

havespan:
    
	// 设置 mspan 状态...
    
	return s
}

// grow allocates a new empty span from the heap and initializes it 
// for c's size class.
func (c *mcentral) grow() *mspan {
    
	npages := uintptr(class_to_allocnpages[c.spanclass.sizeclass()])
	size := uintptr(class_to_size[c.spanclass.sizeclass()])

	s := mheap_.alloc(npages, c.spanclass)
	if s == nil {
		return nil
	}
    
	return s
}