優雅退出在Golang中的實現
背景
為什麼需要優雅關停
- 前台啟動。打開終端,在終端中直接啟動某個進程,此時終端被阻塞,按CTRL+C退出程序,可以輸入其他命令,關閉終端後程序也會跟着退出。
$ ./main $ # 按CTRL+C退出
- 後台啟動。打開終端,以nohup來後台啟動某個進程,這樣退出終端後,進程仍然會後台運行。
$ nohup main > log.out 2>&1 & $ ps aux | grep main # 需要使用 kill 殺死進程 $ kill 8120
針對上面2種情況,如果你的程序正在寫文件(或者其他很重要,需要一點時間停止的事情),此時被操作系統強制殺掉,因為寫緩衝區的數據還沒有被刷到磁盤,所以你在內存中的那部分數據丟失了。
實現原理
- 比如上面你在終端中按 `CTRL+C` 後,程序會收到 `SIGINT` 信號。
- 打開的終端被關機,會收到 `SIGHUP` 信號。
- kill 8120 殺死某個進程,會收到 `SIGTERM` 信號。

入門例子
代碼
package main
import (
"fmt"
"os"
"os/signal"
"syscall"
"time"
)
// 優雅退出(退出信號)
func waitElegantExit(signalChan chan os.Signal) {
for i := range c {
switch i {
case syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT:
// 這裡做一些清理操作或者輸出相關說明,比如 斷開數據庫連接
fmt.Println("receive exit signal ", i.String(), ",exit...")
os.Exit(0)
}
}
}
func main() {
//
// 你的業務邏輯
//
fmt.Println("server run on: 127.0.0.1:8000")
c := make(chan os.Signal)
// SIGHUP: terminal closed
// SIGINT: Ctrl+C
// SIGTERM: program exit
// SIGQUIT: Ctrl+/
signal.Notify(c, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
// 阻塞,直到接受到退出信號,才停止進程
waitElegantExit(signalChan)
}
詳解
for {
// 從通道接受信號,期間一直阻塞
i := <-c
switch i {
case syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT:
fmt.Println("receive exit signal ", i.String(), ",exit...")
exit()
os.Exit(0)
}
}
效果
server run on: 127.0.0.1:8060 # mac/linux 上按Ctrl+C,windows上調試運行,然後點擊停止 receive exit signal interrupt ,exit... Process finished with exit code 2
實戰
封裝
package osutils
import (
"fmt"
"os"
"os/signal"
"syscall"
)
// WaitExit will block until os signal happened
func WaitExit(c chan os.Signal, exit func()) {
for i := range c {
switch i {
case syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT:
fmt.Println("receive exit signal ", i.String(), ",exit...")
exit()
os.Exit(0)
}
}
}
// NewShutdownSignal new normal Signal channel
func NewShutdownSignal() chan os.Signal {
c := make(chan os.Signal)
// SIGHUP: terminal closed
// SIGINT: Ctrl+C
// SIGTERM: program exit
// SIGQUIT: Ctrl+/
signal.Notify(c, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
return c
}
http server的例子
package main
import (
"context"
"fmt"
"github.com/gin-gonic/gin"
"net/http"
"os"
"os/signal"
"syscall"
"time"
)
// Recover the go routine
func Recover(cleanups ...func()) {
for _, cleanup := range cleanups {
cleanup()
}
if err := recover(); err != nil {
fmt.Println("recover error", err)
}
}
// GoSafe instead go func()
func GoSafe(ctx context.Context, fn func(ctx context.Context)) {
go func(ctx context.Context) {
defer Recover()
if fn != nil {
fn(ctx)
}
}(ctx)
}
func main() {
// a gin http server
gin.SetMode(gin.ReleaseMode)
g := gin.Default()
g.GET("/hello", func(context *gin.Context) {
// 被 gin 所在 goroutine 捕獲
panic("i am panic")
})
httpSrv := &http.Server{
Addr: "127.0.0.1:8060",
Handler: g,
}
fmt.Println("server run on:", httpSrv.Addr)
go httpSrv.ListenAndServe()
// a custom dangerous go routine, 10s later app will crash!!!!
GoSafe(context.Background(), func(ctx context.Context) {
time.Sleep(time.Second * 10)
panic("dangerous")
})
// wait until exit
signalChan := NewShutdownSignal()
WaitExit(signalChan, func() {
// your clean code
if err := httpSrv.Shutdown(context.Background()); err != nil {
fmt.Println(err.Error())
}
fmt.Println("http server closed")
})
}
server run on: 127.0.0.1:8060 ^Creceive exit signal interrupt ,exit... http server closed Process finished with the exit code 0
陷阱和最佳實踐
server run on: 127.0.0.1:8060
panic: dangerous
goroutine 21 [running]:
main.main.func2()
/Users/fei.xu/repo/haoshuo/ws-gate/app/test/main.go:77 +0x40
created by main.main
/Users/fei.xu/repo/haoshuo/ws-gate/app/test/main.go:75 +0x250
Process finished with the exit code 2
// a custom dangerous go routine, 10s later app will crash!!!!
//go func() {
// time.Sleep(time.Second * 10)
// panic("dangerous")
//}()
// use above code instead!
GoSafe(context.Background(), func(ctx context.Context) {
time.Sleep(time.Second * 10)
panic("dangerous")
})
通過查看go panic(runtime/panic.go)部分源碼:
func gopanic(e interface{}) {
gp := getg()
var p _panic
p.arg = e
p.link = gp._panic //p指向更早的panic
gp._panic = (*_panic)(noescape(unsafe.Pointer(&p)))
atomic.Xadd(&runningPanicDefers, 1)
//遍歷defer鏈表
for {
d := gp._defer
if d == nil {
break
}
// 如果defer已經啟動,跳過
if d.started {
gp._defer = d.link
freedefer(d) //釋放defer
continue
}
// 標識defer已經啟動
d.started = true
// 記錄是當前Panic運行這個defer。如果在defer運行期間,有新的Panic,將會標記這個Panic abort=true(強制終止)
d._panic = (*_panic)(noescape(unsafe.Pointer(&p)))
p.argp = unsafe.Pointer(getargp(0))
// 調用 defer
reflectcall(nil, unsafe.Pointer(d.fn), deferArgs(d), uint32(d.siz), uint32(d.siz))
p.argp = nil
// reflectcall did not panic. Remove d.
if gp._defer != d {
throw("bad defer entry in panic")
}
d._panic = nil
d.fn = nil
gp._defer = d.link //遍歷到下一個defer
pc := d.pc
sp := unsafe.Pointer(d.sp) // must be pointer so it gets adjusted during stack copy
freedefer(d)
//已經有recover被調用
if p.recovered {
//調用recovery函數
mcall(recovery)
throw("recovery failed") // mcall should not return
}
}
//defer遍歷完,終止程序
fatalpanic(gp._panic) // should not return
*(*int)(nil) = 0 // not reached
}
//panic沒有被recover,會運行fatalpanic
func fatalpanic(msgs *_panic) {
systemstack(func() {
if startpanic_m() && msgs != nil {
//打印panic messages
printpanics(msgs)
}
//打印panic messages
docrash = dopanic_m(gp, pc, sp)
})
//終止整個程序,所以需要注意:如果goroutine的Panic沒有 recover,會終止整個程序
systemstack(func() {
exit(2)
})
*(*int)(nil) = 0 // not reached
}
我們可以確定,當panic沒有被處理時,runtime 會調用 exit(2) 退出整個應用程序!
package threading
import (
"bytes"
"runtime"
"strconv"
"github.com/zeromicro/go-zero/core/rescue"
)
// GoSafe runs the given fn using another goroutine, recovers if fn panics.
func GoSafe(fn func()) {
go RunSafe(fn)
}
// RoutineId is only for debug, never use it in production.
func RoutineId() uint64 {
b := make([]byte, 64)
b = b[:runtime.Stack(b, false)]
b = bytes.TrimPrefix(b, []byte("goroutine "))
b = b[:bytes.IndexByte(b, ' ')]
// if error, just return 0
n, _ := strconv.ParseUint(string(b), 10, 64)
return n
}
// RunSafe runs the given fn, recovers if fn panics.
func RunSafe(fn func()) {
defer rescue.Recover()
fn()
}
總結
至此,我們介紹了什麼是優雅退出,以及在Linux下幾種常見的退出場景,並給出了Go的入門代碼例子和最佳實踐。
在文章的最後,特別是對Linux C++ 轉go的同學進行了一個提醒:go panic的時候,是不會收到退出信號的,因為它是程序自己主動退出(go runtime),而不是因為非法訪問內存被操作系統殺掉。
針對上面這個問題,給出的建議是,謹慎使用原生go關鍵字,最佳實踐是封裝一個GoSafe函數,在裏面進行 recover() 和打印堆棧,這樣,就不會出現因為忘記 recover 而導致進程崩潰了!
—- The End —-
如有任何想法或者建議,歡迎評論區留言😊。
——————傳說中的分割線——————
大家好,我目前已從C++後端轉型為Golang後端,可以訂閱關注下《Go和分佈式IM》公眾號,獲取一名轉型萌新Gopher的心路成長曆程和升級打怪技巧。


