body-parser 源碼分析

2021 年 1 月 4 日
筆記
node

body-parser 源碼分析

預備知識：熟悉 express 的中間件邏輯
閱讀事件：30min

1. body-parser 解決什麼問題

在 node http 模組中，您只能通過 data 事件，以 buffer 的形式來獲取請求體內容，node 沒有提供如何解析請求body的API，body-parser 提供了這個功能。

body-parser 本質是一個處理請求 body 的中間件函數，他負責按照您給的規則解析請求body，並且將結果賦值到 req.body 屬性上。

2. 簡單的使用 body-parser

var express = require('express')
var bodyParser = require('body-parser')
 
var app = express()
 
// parse application/x-www-form-urlencoded
app.use(bodyParser.urlencoded({ extended: false }))
 
// parse application/json
app.use(bodyParser.json())
 
app.use(function (req, res) {
  res.setHeader('Content-Type', 'text/plain')
  res.write('you posted:\n')
  
  // 您可以通過req.body 來訪問請求體內容
  res.end(JSON.stringify(req.body, null, 2))
})

通過這個例子您可以了解到如何簡單的使用 body-parser。

3. 源碼分析

首先 bodyParser 的源碼結構如下：

index.js：入口文件
lib：核心方法
- types：該文件下的4個文件，分別用於解析對應的4個類型
  - json.js：將body解析為JSON對象
  - raw.js
  - text.js：將body解析為字元串
  - urlencoded.js：將表單數據(urlencoded編碼)解析為JSON對象
- read.js：讀取 body 內容

1. bodyParser的導出形式

bodyParser 的定義在 index.js，這裡的邏輯非常清晰：

創建一個用於解析 json 和 urlencoded 格式的中間件：bodyParser 並導出
給 bodyParser 添加 json/text/raw/urlencoded 方法

'use strict'

var deprecate = require('depd')('body-parser')

// 快取 parser 
var parsers = Object.create(null)

// 導出一個Function
exports = module.exports = deprecate.function(bodyParser,
  'bodyParser: use individual json/urlencoded middlewares')

// JSON parser.
Object.defineProperty(exports, 'json', {
  configurable: true,
  enumerable: true,
  get: createParserGetter('json')
})

// Raw parser.
Object.defineProperty(exports, 'raw', {
  configurable: true,
  enumerable: true,
  get: createParserGetter('raw')
})

// Text parser.
Object.defineProperty(exports, 'text', {
  configurable: true,
  enumerable: true,
  get: createParserGetter('text')
})

// URL-encoded parser.
Object.defineProperty(exports, 'urlencoded', {
  configurable: true,
  enumerable: true,
  get: createParserGetter('urlencoded')
})

// 創建一個用於解析 json 和 urlencoded 格式的中間件
function bodyParser (options) {
  var opts = {}

  // exclude type option
  if (options) {
    for (var prop in options) {
      if (prop !== 'type') {
        opts[prop] = options[prop]
      }
    }
  }

  var _urlencoded = exports.urlencoded(opts)
  var _json = exports.json(opts)

  return function bodyParser (req, res, next) {
    _json(req, res, function (err) {
      if (err) return next(err)
      _urlencoded(req, res, next)
    })
  }
}

// Create a getter for loading a parser.
function createParserGetter (name) {
  return function get () {
    return loadParser(name)
  }
}

// Load a parser module.
function loadParser (parserName) {
  var parser = parsers[parserName]

  if (parser !== undefined) {
    return parser
  }

  // this uses a switch for static require analysis
  switch (parserName) {
    case 'json':
      parser = require('./lib/types/json')
      break
    case 'raw':
      parser = require('./lib/types/raw')
      break
    case 'text':
      parser = require('./lib/types/text')
      break
    case 'urlencoded':
      parser = require('./lib/types/urlencoded')
      break
  }

  // store to prevent invoking require()
  return (parsers[parserName] = parser)
}

4. text 解析流程

將 body 解析非常簡單，這隻需要將 buffer 轉換為 string即可。所以從最簡單 text parser 開始，其他解析大體也是類似的，主要區別在於將字元串解析到特定格式的方法。比如將表單數據(urlencoded form) 解析為JSON對象。

現在您希望將 text/plain 的請求體解析為一個字元串，源碼是這樣的：

// 默認將 type 為 text/plain 解析為字元串
var express = require('express')
var bodyParser = require('body-parser')
 
var app = express()
var port = 3000;
 
app.use(bodyParser.text())

app.post('/text', (req, res) => res.send(req.body))

app.listen(port, () => console.log(`\nExample app listening on port ${port}!`))

當我們 curl 進行如下訪操作：

$ curl -d "hello" //localhost:3000/text
hello

這背後的流程是怎樣的呢？

1. bodyParser.text() 中間件

由於我們使用 bodyParser.text() 中間件，所以當進行上述訪問時，會訪問到 lib/types/text，源碼如下：

'use strict'

var bytes = require('bytes')
var contentType = require('content-type')
var debug = require('debug')('body-parser:text')
var read = require('../read')
var typeis = require('type-is')

// 導出 text 中間件
module.exports = text

// text 中間件 定義
function text (options) {
  // option 是使用該中間件傳入的選項
  var opts = options || {}

	// 獲取字符集
  var defaultCharset = opts.defaultCharset || 'utf-8'
 
  // 是否處理壓縮的body, true時body會被解壓，false時body不會被處理
  var inflate = opts.inflate !== false
  
  // body大小限制
  var limit = typeof opts.limit !== 'number'
    ? bytes.parse(opts.limit || '100kb')
    : opts.limit
  
  // 需要處理的 content-type 類型
  var type = opts.type || 'text/plain'
  
  // 用戶自定義的校驗函數，若提供則會被調用verify(req, res, buf, encoding)
  var verify = opts.verify || false

  if (verify !== false && typeof verify !== 'function') {
    throw new TypeError('option verify must be function')
  }

  // create the appropriate type checking function
  var shouldParse = typeof type !== 'function'
    ? typeChecker(type)
    : type
	
  // 這裡是核心, 不同的解析器有不同的處理方式
  // text parse 很簡單是因為它啥也不需要干
  function parse (buf) {
    return buf
  }

  return function textParser (req, res, next) {
    // 當我們進行 POST 請求時 textParser 中間件會被調用
    // 這裡先判斷 body 是否已經解析過了，下游會設置為 true
    if (req._body) {
      debug('body already parsed')
      next()
      return
    }

    req.body = req.body || {}
		
    // 沒有請求體時不處理
    // skip requests without bodies
    if (!typeis.hasBody(req)) {
      debug('skip empty body')
      next()
      return
    }

    debug('content-type %j', req.headers['content-type'])

    // determine if request should be parsed
    if (!shouldParse(req)) {
      debug('skip parsing')
      next()
      return
    }

    // get charset
    var charset = getCharset(req) || defaultCharset

    // read
    read(req, res, next, parse, debug, {
      encoding: charset,
      inflate: inflate,
      limit: limit,
      verify: verify
    })
  }
}

// 獲取請求字符集
function getCharset (req) {
  try {
    return (contentType.parse(req).parameters.charset || '').toLowerCase()
  } catch (e) {
    return undefined
  }
}

// content-type 檢測
function typeChecker (type) {
  return function checkType (req) {
    return Boolean(typeis(req, type))
  }
}

// 判斷是否包含請求體(這個函數是從type-is包複製出來的)
function hasbody (req) {
  return req.headers['transfer-encoding'] !== undefined ||
    !isNaN(req.headers['content-length'])
}

大概流程如下：

使用 app.use 使用中間件
客戶端發起 POST 請求
進入 textParser 中間件
- 判斷是否已經解析過(req._body = true)
- 判斷請求是否包含請求體
- 判斷請求體類型是否需要處理
- 讀取請求體，解析並設置 req.body && req._body = true
進入 read 中間件(讀取請求體，解析並設置 req.body && req._body = true)

2. read() 中間件(lib/read.js)

lib/types 下的4個文件，最終都會訪問 lib/read.js，形式如下：

read(req, res, next, parse, debug, {
  encoding: charset,
  inflate: inflate,
  limit: limit,
  verify: verify
})

現在我們來看下 lib/read.js 源碼：

'use strict'

var createError = require('http-errors')
var getBody = require('raw-body')
var iconv = require('iconv-lite')
var onFinished = require('on-finished')
var zlib = require('zlib')

module.exports = read

function read (req, res, next, parse, debug, options) {
  var length
  var opts = options
  var stream

  // parsed flag, 上游服務有做判斷
  req._body = true

  // read options
  var encoding = opts.encoding !== null
    ? opts.encoding
    : null
  var verify = opts.verify

  try {
    // get the content stream
    stream = contentstream(req, debug, opts.inflate)
    length = stream.length
    stream.length = undefined
  } catch (err) {
    return next(err)
  }

  // set raw-body options
  opts.length = length
  opts.encoding = verify
    ? null
    : encoding

  // assert charset is supported
  if (opts.encoding === null && encoding !== null && !iconv.encodingExists(encoding)) {
    return next(createError(415, 'unsupported charset "' + encoding.toUpperCase() + '"', {
      charset: encoding.toLowerCase(),
      type: 'charset.unsupported'
    }))
  }

  // read body
  debug('read body')
  // getBody 函數用於從 stream 中讀取內容
  getBody(stream, opts, function (error, body) {
    if (error) {
      // 異常處理
      var _error
      if (error.type === 'encoding.unsupported') {
        // echo back charset
        _error = createError(415, 'unsupported charset "' + encoding.toUpperCase() + '"', {
          charset: encoding.toLowerCase(),
          type: 'charset.unsupported'
        })
      } else {
        // set status code on error
        _error = createError(400, error)
      }

      // read off entire request
      stream.resume()
      onFinished(req, function onfinished () {
        next(createError(400, _error))
      })
      return
    }

    // 用戶自定義校驗函數 verify
    if (verify) {
      try {
        debug('verify body')
        verify(req, res, body, encoding)
      } catch (err) {
        next(createError(403, err, {
          body: body,
          type: err.type || 'entity.verify.failed'
        }))
        return
      }
    }

    var str = body
    try {
      debug('parse body')
      // 如果body不是字元類型而且設置了encoding，那麼需要重新解碼 
      str = typeof body !== 'string' && encoding !== null
        ? iconv.decode(body, encoding)
        : body
     	
      // 這裡不同解析器，會傳入不同 parse
      req.body = parse(str)
    } catch (err) {
      next(createError(400, err, {
        body: str,
        type: err.type || 'entity.parse.failed'
      }))
      return
    }

    next()
  })
}

// 獲取請求體 stream
// 1. 獲取壓縮編碼格式，如果有壓縮需要先解壓
// 2. 返回 stream
function contentstream (req, debug, inflate) {
  var encoding = (req.headers['content-encoding'] || 'identity').toLowerCase()
  var length = req.headers['content-length']
  var stream

  debug('content-encoding "%s"', encoding)

  if (inflate === false && encoding !== 'identity') {
    throw createError(415, 'content encoding unsupported', {
      encoding: encoding,
      type: 'encoding.unsupported'
    })
  }

  switch (encoding) {
    case 'deflate':
      stream = zlib.createInflate()
      debug('inflate body')
      req.pipe(stream)
      break
    case 'gzip':
      stream = zlib.createGunzip()
      debug('gunzip body')
      req.pipe(stream)
      break
    case 'identity':
      stream = req
      stream.length = length
      break
    default:
      throw createError(415, 'unsupported content encoding "' + encoding + '"', {
        encoding: encoding,
        type: 'encoding.unsupported'
      })
  }

  return stream
}

5. 一些疑問

1. 為什麼要對 charset 進行處理

其實本質上來說，charset前端一般都是固定為utf-8的，甚至在JQuery的AJAX請求中，前端請求charset甚至是不可更改，只能是charset，但是在使用fetch等API的時候，的確是可以更改charset的，這個工作嘗試滿足一些比較偏僻的更改charset需求。

2. 為什麼要對 content-encoding 做處理

一般情況下我們認為，考慮到前端發的AJAX之類的請求的數據量，是不需要做Gzip壓縮的。但是向伺服器發起請求的不一定只有前端，還可能是Node的客戶端。這些Node客戶端可能會向Node服務端傳送壓縮過後的數據流。例如下面的程式碼所示:

const zlib = require('zlib');
const request = require('request');
const data = zlib.gzipSync(Buffer.from("我是一個被Gzip壓縮後的數據"));
request({
    method: 'POST',
    url: '//127.0.0.1:3000/post',
    headers: {//設置請求頭
        "Content-Type": "text/plain",
        "Content-Encoding": "gzip"
    },
    body: data
})

6. 參考以及延伸

npm bodyParser //www.npmjs.com/package/body-parser#bodyparsertextoptions
npm iconv-lite 純JS編碼轉換器
npm raw-body 以buffer或者string的方式獲取一個可讀流的全部內容，並且可校驗長度
bodyparser實現原理解析(這篇文章回答了我上述2個疑問) //zhuanlan.zhihu.com/p/78482006

Tags: node