span的sweep函数用于清扫单个span:
// Sweep frees or collects finalizers for blocks not marked in the mark phase.
// It clears the mark bits in preparation for the next GC round.
// Returns true if the span was returned to heap.
// If preserve=true, don't return it to heap nor relink in MCentral lists;
// caller takes care of it.
//TODO go:nowritebarrier
func (s *mspan) sweep(preserve bool) bool {
// It's critical that we enter this function with preemption disabled,
// GC must not start while we are in the middle of this function.
_g_ := getg()
if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 {
throw("MSpan_Sweep: m is not locked")
}
sweepgen := mheap_.sweepgen
if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
throw("MSpan_Sweep: bad span state")
}
if trace.enabled {
traceGCSweepSpan(s.npages * _PageSize)
}
// 统计已清理的页数
atomic.Xadd64(&mheap_.pagesSwept, int64(s.npages))
spc := s.spanclass
size := s.elemsize
res := false
c := _g_.m.mcache
freeToHeap := false
// The allocBits indicate which unmarked objects don't need to be
// processed since they were free at the end of the last GC cycle
// and were not allocated since then.
// If the allocBits index is >= s.freeindex and the bit
// is not marked then the object remains unallocated
// since the last GC.
// This situation is analogous to being on a freelist.
// 判断在special中的析构器, 如果对应的对象已经不再存活则标记对象存活防止回收, 然后把析构器移到运行队列
// Unlink & free special records for any objects we're about to free.
// Two complications here:
// 1. An object can have both finalizer and profile special records.
// In such case we need to queue finalizer for execution,
// mark the object as live and preserve the profile special.
// 2. A tiny object can have several finalizers setup for different offsets.
// If such object is not marked, we need to queue all finalizers at once.
// Both 1 and 2 are possible at the same time.
specialp := &s.specials
special := *specialp
for special != nil {
// A finalizer can be set for an inner byte of an object, find object beginning.
objIndex := uintptr(special.offset) / size
p := s.base() + objIndex*size
mbits := s.markBitsForIndex(objIndex)
if !mbits.isMarked() {
// This object is not marked and has at least one special record.
// Pass 1: see if it has at least one finalizer.
hasFin := false
endOffset := p - s.base() + size
for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next {
if tmp.kind == _KindSpecialFinalizer {
// Stop freeing of object if it has a finalizer.
mbits.setMarkedNonAtomic()
hasFin = true
break
}
}
// Pass 2: queue all finalizers _or_ handle profile record.
for special != nil && uintptr(special.offset) < endOffset {
// Find the exact byte for which the special was setup
// (as opposed to object beginning).
p := s.base() + uintptr(special.offset)
if special.kind == _KindSpecialFinalizer || !hasFin {
// Splice out special record.
y := special
special = special.next
*specialp = special
freespecial(y, unsafe.Pointer(p), size)
} else {
// This is profile record, but the object has finalizers (so kept alive).
// Keep special record.
specialp = &special.next
special = *specialp
}
}
} else {
// object is still live: keep special record
specialp = &special.next
special = *specialp
}
}
// 除错用
if debug.allocfreetrace != 0 || raceenabled || msanenabled {
// Find all newly freed objects. This doesn't have to
// efficient; allocfreetrace has massive overhead.
mbits := s.markBitsForBase()
abits := s.allocBitsForIndex(0)
for i := uintptr(0); i < s.nelems; i++ {
if !mbits.isMarked() && (abits.index < s.freeindex || abits.isMarked()) {
x := s.base() + i*s.elemsize
if debug.allocfreetrace != 0 {
tracefree(unsafe.Pointer(x), size)
}
if raceenabled {
racefree(unsafe.Pointer(x), size)
}
if msanenabled {
msanfree(unsafe.Pointer(x), size)
}
}
mbits.advance()
abits.advance()
}
}
// 计算释放的对象数量
// Count the number of free objects in this span.
nalloc := uint16(s.countAlloc())
if spc.sizeclass() == 0 && nalloc == 0 {
// 如果span的类型是0(大对象)并且其中的对象已经不存活则释放到heap
s.needzero = 1
freeToHeap = true
}
nfreed := s.allocCount - nalloc
if nalloc > s.allocCount {
print("runtime: nelems=", s.nelems, " nalloc=", nalloc, " previous allocCount=", s.allocCount, " nfreed=", nfreed, "\n")
throw("sweep increased allocation count")
}
// 设置新的allocCount
s.allocCount = nalloc
// 判断span是否无未分配的对象
wasempty := s.nextFreeIndex() == s.nelems
// 重置freeindex, 下次分配从0开始搜索
s.freeindex = 0 // reset allocation index to start of span.
if trace.enabled {
getg().m.p.ptr().traceReclaimed += uintptr(nfreed) * s.elemsize
}
// gcmarkBits变为新的allocBits
// 然后重新分配一块全部为0的gcmarkBits
// 下次分配对象时可以根据allocBits得知哪些元素是未分配的
// gcmarkBits becomes the allocBits.
// get a fresh cleared gcmarkBits in preparation for next GC
s.allocBits = s.gcmarkBits
s.gcmarkBits = newMarkBits(s.nelems)
// 更新freeindex开始的allocCache
// Initialize alloc bits cache.
s.refillAllocCache(0)
// 如果span中已经无存活的对象则更新sweepgen到最新
// 下面会把span加到mcentral或者mheap
// We need to set s.sweepgen = h.sweepgen only when all blocks are swept,
// because of the potential for a concurrent free/SetFinalizer.
// But we need to set it before we make the span available for allocation
// (return it to heap or mcentral), because allocation code assumes that a
// span is already swept if available for allocation.
if freeToHeap || nfreed == 0 {
// The span must be in our exclusive ownership until we update sweepgen,
// check for potential races.
if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
throw("MSpan_Sweep: bad span state after sweep")
}
// Serialization point.
// At this point the mark bits are cleared and allocation ready
// to go so release the span.
atomic.Store(&s.sweepgen, sweepgen)
}
if nfreed > 0 && spc.sizeclass() != 0 {
// 把span加到mcentral, res等于是否添加成功
c.local_nsmallfree[spc.sizeclass()] += uintptr(nfreed)
res = mheap_.central[spc].mcentral.freeSpan(s, preserve, wasempty)
// freeSpan会更新sweepgen
// MCentral_FreeSpan updates sweepgen
} else if freeToHeap {
// 把span释放到mheap
// Free large span to heap
// NOTE(rsc,dvyukov): The original implementation of efence
// in CL 22060046 used SysFree instead of SysFault, so that
// the operating system would eventually give the memory
// back to us again, so that an efence program could run
// longer without running out of memory. Unfortunately,
// calling SysFree here without any kind of adjustment of the
// heap data structures means that when the memory does
// come back to us, we have the wrong metadata for it, either in
// the MSpan structures or in the garbage collection bitmap.
// Using SysFault here means that the program will run out of
// memory fairly quickly in efence mode, but at least it won't
// have mysterious crashes due to confused memory reuse.
// It should be possible to switch back to SysFree if we also
// implement and then call some kind of MHeap_DeleteSpan.
if debug.efence > 0 {
s.limit = 0 // prevent mlookup from finding this span
sysFault(unsafe.Pointer(s.base()), size)
} else {
mheap_.freeSpan(s, 1)
}
c.local_nlargefree++
c.local_largefree += size
res = true
}
// 如果span未加到mcentral或者未释放到mheap, 则表示span仍在使用
if !res {
// 把仍在使用的span加到sweepSpans的"已清扫"队列中
// The span has been swept and is still in-use, so put
// it on the swept in-use list.
mheap_.sweepSpans[sweepgen/2%2].push(s)
}
return res
}
从bgsweep和前面的分配器可以看出扫描阶段的工作是十分懒惰(lazy)的, 实际可能会出现前一阶段的扫描还未完成, 就需要开始新一轮的GC的情况, 所以每一轮GC开始之前都需要完成前一轮GC的扫描工作(Sweep Termination阶段).
GC的整个流程都分析完毕了, 最后贴上写屏障函数writebarrierptr的实现:
// NOTE: Really dst *unsafe.Pointer, src unsafe.Pointer,
// but if we do that, Go inserts a write barrier on *dst = src.
//go:nosplit
func writebarrierptr(dst *uintptr, src uintptr) {
if writeBarrier.cgo {
cgoCheckWriteBarrier(dst, src)
}
if !writeBarrier.needed {
*dst = src
return
}
if src != 0 && src < minPhysPageSize {
systemstack(func() {
print("runtime: writebarrierptr *", dst, " = ", hex(src), "\n")
throw("bad pointer in write barrier")
})
}
// 标记指针
writebarrierptr_prewrite1(dst, src)
// 设置指针到目标
*dst = src
}
writebarrierptr_prewrite1函数如下:
// writebarrierptr_prewrite1 invokes a write barrier for *dst = src
// prior to the write happening.
//
// Write barrier calls must not happen during critical GC and scheduler
// related operations. In particular there are times when the GC assumes
// that the world is stopped but scheduler related code is still being
// executed, dealing with syscalls, dealing with putting gs on runnable
// queues and so forth. This code cannot execute write barriers because
// the GC might drop them on the floor. Stopping the world involves removing
// the p associated with an m. We use the fact that m.p == nil to indicate
// that we are in one these critical section and throw if the write is of
// a pointer to a heap object.
//go:nosplit
func writebarrierptr_prewrite1(dst *uintptr, src uintptr) {
mp := acquirem()
if mp.inwb || mp.dying > 0 {
releasem(mp)
return
}
systemstack(func() {
if mp.p == 0 && memstats.enablegc && !mp.inwb && inheap(src) {
throw("writebarrierptr_prewrite1 called with mp.p == nil")
}
mp.inwb = true
gcmarkwb_m(dst, src)
})
mp.inwb = false
releasem(mp)
}
gcmarkwb_m函数如下:
func gcmarkwb_m(slot *uintptr, ptr uintptr) {
if writeBarrier.needed {
// Note: This turns bad pointer writes into bad
// pointer reads, which could be confusing. We avoid
// reading from obviously bad pointers, which should
// take care of the vast majority of these. We could
// patch this up in the signal handler, or use XCHG to
// combine the read and the write. Checking inheap is
// insufficient since we need to track changes to
// roots outside the heap.
//
// Note: profbuf.go omits a barrier during signal handler
// profile logging; that's safe only because this deletion barrier exists.
// If we remove the deletion barrier, we'll have to work out
// a new way to handle the profile logging.
if slot1 := uintptr(unsafe.Pointer(slot)); slot1 >= minPhysPageSize {
if optr := *slot; optr != 0 {
// 标记旧指针
shade(optr)
}
}
// TODO: Make this conditional on the caller's stack color.
if ptr != 0 && inheap(ptr) {
// 标记新指针
shade(ptr)
}
}
}
shade函数如下:
// Shade the object if it isn't already.
// The object is not nil and known to be in the heap.
// Preemption must be disabled.
//go:nowritebarrier
func shade(b uintptr) {
if obj, hbits, span, objIndex := heapBitsForObject(b, 0, 0); obj != 0 {
gcw := &getg().m.p.ptr().gcw
// 标记一个对象存活, 并把它加到标记队列(该对象变为灰色)
greyobject(obj, 0, 0, hbits, span, gcw, objIndex)
// 如果标记了禁止本地标记队列则flush到全局标记队列
if gcphase == _GCmarktermination || gcBlackenPromptly {
// Ps aren't allowed to cache work during mark
// termination.
gcw.dispose()
}
}
}
参考链接
https://github.com/golang/go
https://making.pusher.com/golangs-real-time-gc-in-theory-and-practice
https://github.com/golang/proposal/blob/master/design/17503-eliminate-rescan.md
https://golang.org/s/go15gcpacing
https://golang.org/ref/mem
https://talks.golang.org/2015/go-gc.pdf
https://docs.google.com/document/d/1ETuA2IOmnaQ4j81AtTGT40Y4_Jr6_IDASEKg0t0dBR8/edit#heading=h.x4kziklnb8fr
https://go-review.googlesource.com/c/go/+/21503
http://www.cnblogs.com/diegodu/p/5803202.html
http://legendtkl.com/2017/04/28/golang-gc
https://lengzzz.com/note/gc-in-golang
Golang的GC和CoreCLR的GC对比
因为我之前已经对CoreCLR的GC做过分析(看这一篇和这一篇), 这里我可以简单的对比一下CoreCLR和GO的GC实现:
CoreCLR的对象带有类型信息, GO的对象不带, 而是通过bitmap区域记录哪些地方包含指针
CoreCLR分配对象的速度明显更快, GO分配对象需要查找span和写入bitmap区域
CoreCLR的收集器需要做的工作比GO多很多
CoreCLR不同大小的对象都会放在一个segment中, 只能线性扫描
CoreCLR判断对象引用要访问类型信息, 而go只需要访问bitmap
CoreCLR清扫时要一个个去标记为自由对象, 而go只需要切换allocBits
CoreCLR的停顿时间比GO要长
虽然CoreCLR支持并行GC, 但是没有GO彻底, GO连扫描根对象都不需要完全停顿
CoreCLR支持分代GC
虽然Full GC时CoreCLR的效率不如GO, 但是CoreCLR可以在大部分时候只扫描第0和第1代的对象
因为支持分代GC, 通常CoreCLR花在GC上的CPU时间会比GO要少
CoreCLR的分配器和收集器通常比GO要高效, 也就是说CoreCLR会有更高的吞吐量.
但CoreCLR的最大停顿时间不如GO短, 这是因为GO的GC整个设计都是为了减少停顿时间.
现在分布式计算和横向扩展越来越流行,
比起追求单机吞吐量, 追求低延迟然后让分布式解决吞吐量问题无疑是更明智的选择,
GO的设计目标使得它比其他语言都更适合编写网络服务程序.
版权申明:内容来源网络,版权归原创者所有。除非无法确认,我们都会标明作者及出处,如有侵权烦请告知,我们会立即删除并表示歉意。谢谢。