記go中一次http超時引發的事故

2021 年 6 月 2 日
筆記
golang

記一次http超時引發的事故

記一次http超時引發的事故

前言

我們使用的是golang標準庫的http client，對於一些http請求，我們在處理的時候，會考慮加上超時時間，防止http請求一直在請求，導致業務長時間阻塞等待。

最近同事寫了一個超時的組件，這幾天訪問量上來了，網絡也出現了波動，造成了接口在報錯超時的情況下，還是出現了請求結果的成功。

分析下具體的代碼實現

type request struct {
	method string
	url    string
	value  string
	ps     *params
}

type params struct {
	timeout     int //超時時間
	retry       int //重試次數
	headers     map[string]string
	contentType string
}

func (req *request) Do(result interface{}) ([]byte, error) {
	res, err := asyncCall(doRequest, req)
	if err != nil {
		return nil, err
	}

	if result == nil {
		return res, nil
	}

	switch req.ps.contentType {
	case "application/xml":
		if err := xml.Unmarshal(res, result); err != nil {
			return nil, err
		}
	default:
		if err := json.Unmarshal(res, result); err != nil {
			return nil, err
		}
	}

	return res, nil
}
type timeout struct {
	data []byte
	err  error
}


func doRequest(request *request) ([]byte, error) {
	var (
		req    *http.Request
		errReq error
	)
	if request.value != "null" {
		buf := strings.NewReader(request.value)
		req, errReq = http.NewRequest(request.method, request.url, buf)
		if errReq != nil {
			return nil, errReq
		}
	} else {
		req, errReq = http.NewRequest(request.method, request.url, nil)
		if errReq != nil {
			return nil, errReq
		}
	}
	// 這裡的client沒有設置超時時間
	// 所以當下面檢測到一次超時的時候，會重新又發起一次請求
	// 但是老的請求其實沒有被關閉，一直在執行
	client := http.Client{}
	res, err := client.Do(req)
	...
}

// 重試調用請求
// 當超時的時候發起一次新的請求
func asyncCall(f func(request *request) ([]byte, error), req *request) ([]byte, error) {
	p := req.ps
	ctx := context.Background()
	done := make(chan *timeout, 1)

	for i := 0; i < p.retry; i++ {
		go func(ctx context.Context) {
			// 發送HTTP請求
			res, err := f(req)
			done <- &timeout{
				data: res,
				err:  err,
			}
		}(ctx)
		// 錯誤主要在這裡
		// 如果超時重試為3，第一次超時了，馬上又發起了一次新的請求，但是這裡錯誤使用了超時的退出
		// 具體看上面
		select {
		case res := <-done:
			return res.data, res.err
		case <-time.After(time.Duration(p.timeout) * time.Millisecond):
		}
	}
	return nil, ecode.TimeoutErr
}

錯誤的原因

1、超時重試，之後過了一段時間沒有拿到結果就認為是超時了，但是http請求沒有被關閉；

2、錯誤使用了http的超時，具體的做法要通過context或http.client去實現，見下文；

修改之後的代碼

func doRequest(request *request) ([]byte, error) {
	var (
		req    *http.Request
		errReq error
	)
	if request.value != "null" {
		buf := strings.NewReader(request.value)
		req, errReq = http.NewRequest(request.method, request.url, buf)
		if errReq != nil {
			return nil, errReq
		}
	} else {
		req, errReq = http.NewRequest(request.method, request.url, nil)
		if errReq != nil {
			return nil, errReq
		}
	}

	// 這裡通過http.Client設置超時時間
	client := http.Client{
		Timeout: time.Duration(request.ps.timeout) * time.Millisecond,
	}
	res, err := client.Do(req)
	...
}

func asyncCall(f func(request *request) ([]byte, error), req *request) ([]byte, error) {
	p := req.ps
	// 重試的時候只有上一個http請求真的超時了，之後才會發起一次新的請求
	for i := 0; i < p.retry; i++ {
		// 發送HTTP請求
		res, err := f(req)
		// 判斷超時
		if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
			continue
		}

		return res, err

	}
	return nil, ecode.TimeoutErr
}

服務設置超時

http.Server有兩個設置超時的方法:

ReadTimeout

ReadTimeout的時間計算是從連接被接受(accept)到request body完全被讀取(如果你不讀取body，那麼時間截止到讀完header為止)

WriteTimeout

WriteTimeout的時間計算正常是從request header的讀取結束開始，到response write結束為止 (也就是ServeHTTP方法的生命周期)

srv := &http.Server{  
    ReadTimeout: 5 * time.Second,
    WriteTimeout: 10 * time.Second,
}

srv.ListenAndServe()

net/http包還提供了TimeoutHandler返回了一個在給定的時間限制內運行的handler

func TimeoutHandler(h Handler, dt time.Duration, msg string) Handler

第一個參數是Handler，第二個參數是time.Duration（超時時間），第三個參數是string類型，當到達超時時間後返回的信息

func handler(w http.ResponseWriter, r *http.Request) {
	time.Sleep(3 * time.Second)
	fmt.Println("測試超時")

	w.Write([]byte("hello world"))
}

func server() {
	srv := http.Server{
		Addr:         ":8081",
		WriteTimeout: 1 * time.Second,
		Handler:      http.TimeoutHandler(http.HandlerFunc(handler), 5*time.Second, "Timeout!\n"),
	}
	if err := srv.ListenAndServe(); err != nil {
		os.Exit(1)
	}
}

客戶端設置超時

http.client

最簡單的我們通過http.Client的Timeout字段，就可以實現客戶端的超時控制

http.client超時是超時的高層實現，包含了從Dial到Response Body的整個請求流程。http.client的實現提供了一個結構體類型可以接受一個額外的time.Duration類型的Timeout屬性。這個參數定義了從請求開始到響應消息體被完全接收的時間限制。

func httpClientTimeout() {
	c := &http.Client{
		Timeout: 3 * time.Second,
	}

	resp, err := c.Get("//127.0.0.1:8081/test")
	fmt.Println(resp)
	fmt.Println(err)
}

context

net/http中的request實現了context,所以我們可以藉助於context本身的超時機制，實現http中request的超時處理

func contextTimeout() {
	ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
	defer cancel()

	req, err := http.NewRequest("GET", "//127.0.0.1:8081/test", nil)
	if err != nil {
		log.Fatal(err)
	}

	resp, err := http.DefaultClient.Do(req.WithContext(ctx))
	fmt.Println(resp)
	fmt.Println(err)
}

使用context的優點就是，當父context被取消時，子context就會層層退出。

http.Transport

通過Transport還可以進行一些更小維度的超時設置

net.Dialer.Timeout 限制建立TCP連接的時間
http.Transport.TLSHandshakeTimeout 限制 TLS握手的時間
http.Transport.ResponseHeaderTimeout 限制讀取response header的時間
http.Transport.ExpectContinueTimeout 限制client在發送包含 Expect: 100-continue的header到收到繼續發送body的response之間的時間等待。注意在1.6中設置這個值會禁用HTTP/2(DefaultTransport自1.6.2起是個特例)

func transportTimeout() {
	transport := &http.Transport{
		DialContext:           (&net.Dialer{}).DialContext,
		ResponseHeaderTimeout: 3 * time.Second,
	}

	c := http.Client{Transport: transport}

	resp, err := c.Get("//127.0.0.1:8081/test")
	fmt.Println(resp)
	fmt.Println(err)
}

問題

如果在客戶端在超時的臨界點，觸發了超時機制，這時候服務端剛好也接收到了，http的請求

這種服務端還是可以拿到請求的數據，所以對於超時時間的設置我們需要根據實際情況進行權衡，同時我們要考慮接口的冪等性。

總結

1、所有的超時實現都是基於Deadline，Deadline是一個時間的絕對值，一旦設置他們永久生效，不管此時連接是否被使用和怎麼用，所以需要每手動設置，所以如果想使用SetDeadline建立超時機制，需要每次在Read/Write操作之前調用它。

2、使用context進行超時控制的好處就是，當父context超時的時候，子context就會層層退出。

參考

【[譯]Go net/http 超時機制完全手冊】//colobu.com/2016/07/01/the-complete-guide-to-golang-net-http-timeouts/
【Go 語言 HTTP 請求超時入門】//studygolang.com/articles/14405
【使用 timeout、deadline 和 context 取消參數使 Go net/http 服務更靈活】//jishuin.proginn.com/p/763bfbd2fb6a

Tags: golang