记go中一次http超时引发的事故

2021 年 6 月 2 日
筆記
golang

记一次http超时引发的事故

记一次http超时引发的事故

前言

我们使用的是golang标准库的http client，对于一些http请求，我们在处理的时候，会考虑加上超时时间，防止http请求一直在请求，导致业务长时间阻塞等待。

最近同事写了一个超时的组件，这几天访问量上来了，网络也出现了波动，造成了接口在报错超时的情况下，还是出现了请求结果的成功。

分析下具体的代码实现

type request struct {
	method string
	url    string
	value  string
	ps     *params
}

type params struct {
	timeout     int //超时时间
	retry       int //重试次数
	headers     map[string]string
	contentType string
}

func (req *request) Do(result interface{}) ([]byte, error) {
	res, err := asyncCall(doRequest, req)
	if err != nil {
		return nil, err
	}

	if result == nil {
		return res, nil
	}

	switch req.ps.contentType {
	case "application/xml":
		if err := xml.Unmarshal(res, result); err != nil {
			return nil, err
		}
	default:
		if err := json.Unmarshal(res, result); err != nil {
			return nil, err
		}
	}

	return res, nil
}
type timeout struct {
	data []byte
	err  error
}


func doRequest(request *request) ([]byte, error) {
	var (
		req    *http.Request
		errReq error
	)
	if request.value != "null" {
		buf := strings.NewReader(request.value)
		req, errReq = http.NewRequest(request.method, request.url, buf)
		if errReq != nil {
			return nil, errReq
		}
	} else {
		req, errReq = http.NewRequest(request.method, request.url, nil)
		if errReq != nil {
			return nil, errReq
		}
	}
	// 这里的client没有设置超时时间
	// 所以当下面检测到一次超时的时候，会重新又发起一次请求
	// 但是老的请求其实没有被关闭，一直在执行
	client := http.Client{}
	res, err := client.Do(req)
	...
}

// 重试调用请求
// 当超时的时候发起一次新的请求
func asyncCall(f func(request *request) ([]byte, error), req *request) ([]byte, error) {
	p := req.ps
	ctx := context.Background()
	done := make(chan *timeout, 1)

	for i := 0; i < p.retry; i++ {
		go func(ctx context.Context) {
			// 发送HTTP请求
			res, err := f(req)
			done <- &timeout{
				data: res,
				err:  err,
			}
		}(ctx)
		// 错误主要在这里
		// 如果超时重试为3，第一次超时了，马上又发起了一次新的请求，但是这里错误使用了超时的退出
		// 具体看上面
		select {
		case res := <-done:
			return res.data, res.err
		case <-time.After(time.Duration(p.timeout) * time.Millisecond):
		}
	}
	return nil, ecode.TimeoutErr
}

错误的原因

1、超时重试，之后过了一段时间没有拿到结果就认为是超时了，但是http请求没有被关闭；

2、错误使用了http的超时，具体的做法要通过context或http.client去实现，见下文；

修改之后的代码

func doRequest(request *request) ([]byte, error) {
	var (
		req    *http.Request
		errReq error
	)
	if request.value != "null" {
		buf := strings.NewReader(request.value)
		req, errReq = http.NewRequest(request.method, request.url, buf)
		if errReq != nil {
			return nil, errReq
		}
	} else {
		req, errReq = http.NewRequest(request.method, request.url, nil)
		if errReq != nil {
			return nil, errReq
		}
	}

	// 这里通过http.Client设置超时时间
	client := http.Client{
		Timeout: time.Duration(request.ps.timeout) * time.Millisecond,
	}
	res, err := client.Do(req)
	...
}

func asyncCall(f func(request *request) ([]byte, error), req *request) ([]byte, error) {
	p := req.ps
	// 重试的时候只有上一个http请求真的超时了，之后才会发起一次新的请求
	for i := 0; i < p.retry; i++ {
		// 发送HTTP请求
		res, err := f(req)
		// 判断超时
		if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
			continue
		}

		return res, err

	}
	return nil, ecode.TimeoutErr
}

服务设置超时

http.Server有两个设置超时的方法:

ReadTimeout

ReadTimeout的时间计算是从连接被接受(accept)到request body完全被读取(如果你不读取body，那么时间截止到读完header为止)

WriteTimeout

WriteTimeout的时间计算正常是从request header的读取结束开始，到response write结束为止 (也就是ServeHTTP方法的生命周期)

srv := &http.Server{  
    ReadTimeout: 5 * time.Second,
    WriteTimeout: 10 * time.Second,
}

srv.ListenAndServe()

net/http包还提供了TimeoutHandler返回了一个在给定的时间限制内运行的handler

func TimeoutHandler(h Handler, dt time.Duration, msg string) Handler

第一个参数是Handler，第二个参数是time.Duration（超时时间），第三个参数是string类型，当到达超时时间后返回的信息

func handler(w http.ResponseWriter, r *http.Request) {
	time.Sleep(3 * time.Second)
	fmt.Println("测试超时")

	w.Write([]byte("hello world"))
}

func server() {
	srv := http.Server{
		Addr:         ":8081",
		WriteTimeout: 1 * time.Second,
		Handler:      http.TimeoutHandler(http.HandlerFunc(handler), 5*time.Second, "Timeout!\n"),
	}
	if err := srv.ListenAndServe(); err != nil {
		os.Exit(1)
	}
}

客户端设置超时

http.client

最简单的我们通过http.Client的Timeout字段，就可以实现客户端的超时控制

http.client超时是超时的高层实现，包含了从Dial到Response Body的整个请求流程。http.client的实现提供了一个结构体类型可以接受一个额外的time.Duration类型的Timeout属性。这个参数定义了从请求开始到响应消息体被完全接收的时间限制。

func httpClientTimeout() {
	c := &http.Client{
		Timeout: 3 * time.Second,
	}

	resp, err := c.Get("//127.0.0.1:8081/test")
	fmt.Println(resp)
	fmt.Println(err)
}

context

net/http中的request实现了context,所以我们可以借助于context本身的超时机制，实现http中request的超时处理

func contextTimeout() {
	ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
	defer cancel()

	req, err := http.NewRequest("GET", "//127.0.0.1:8081/test", nil)
	if err != nil {
		log.Fatal(err)
	}

	resp, err := http.DefaultClient.Do(req.WithContext(ctx))
	fmt.Println(resp)
	fmt.Println(err)
}

使用context的优点就是，当父context被取消时，子context就会层层退出。

http.Transport

通过Transport还可以进行一些更小维度的超时设置

net.Dialer.Timeout 限制建立TCP连接的时间
http.Transport.TLSHandshakeTimeout 限制 TLS握手的时间
http.Transport.ResponseHeaderTimeout 限制读取response header的时间
http.Transport.ExpectContinueTimeout 限制client在发送包含 Expect: 100-continue的header到收到继续发送body的response之间的时间等待。注意在1.6中设置这个值会禁用HTTP/2(DefaultTransport自1.6.2起是个特例)

func transportTimeout() {
	transport := &http.Transport{
		DialContext:           (&net.Dialer{}).DialContext,
		ResponseHeaderTimeout: 3 * time.Second,
	}

	c := http.Client{Transport: transport}

	resp, err := c.Get("//127.0.0.1:8081/test")
	fmt.Println(resp)
	fmt.Println(err)
}

问题

如果在客户端在超时的临界点，触发了超时机制，这时候服务端刚好也接收到了，http的请求

这种服务端还是可以拿到请求的数据，所以对于超时时间的设置我们需要根据实际情况进行权衡，同时我们要考虑接口的幂等性。

总结

1、所有的超时实现都是基于Deadline，Deadline是一个时间的绝对值，一旦设置他们永久生效，不管此时连接是否被使用和怎么用，所以需要每手动设置，所以如果想使用SetDeadline建立超时机制，需要每次在Read/Write操作之前调用它。

2、使用context进行超时控制的好处就是，当父context超时的时候，子context就会层层退出。

参考

【[译]Go net/http 超时机制完全手册】//colobu.com/2016/07/01/the-complete-guide-to-golang-net-http-timeouts/
【Go 语言 HTTP 请求超时入门】//studygolang.com/articles/14405
【使用 timeout、deadline 和 context 取消参数使 Go net/http 服务更灵活】//jishuin.proginn.com/p/763bfbd2fb6a

Tags: golang