用golang在服务端将html转为word(docx)尝试

  • 2019 年 12 月 20 日
  • 笔记

对于存在数据库表中的html内容,在页面上解析很多支持的前端,小程序端也有专门的解析。可导出到word,不用前端怎么弄?

我用goquery+github.com/unidoc/unioffice来尝试一下。

将engineercms里的日志尝试导出来。

设代日志在线查阅:

和导出的word格式对比一下吧:

思路是这样的,先取出内容,用goquery先分出<p里的段落,一方面提取txt,另一方面提取html,后者含有<img里的src图片地址

然后循环段落,每段后面插入img,只能这样笨办法了。完全嵌入图片,就麻烦点了。

type DiaryContent struct {  	Txt  string  	Html string  }    func (c *DiaryController) HtmlToDoc() {  	id := beego.AppConfig.String("wxdiaryprojectid") //"26159" //25002珠三角设代日记id26159    	// limit := "10"  	limit := c.Input().Get("limit")  	limit1, err := strconv.Atoi(limit)  	if err != nil {  		beego.Error(err)  	}  	page := c.Input().Get("page")    	page1, err := strconv.Atoi(page)  	if err != nil {  		beego.Error(err)  	}    	var idNum int64  	//id转成64为  	idNum, err = strconv.ParseInt(id, 10, 64)  	if err != nil {  		beego.Error(err)  	}    	var offset int  	if page1 <= 1 {  		offset = 0  	} else {  		offset = (page1 - 1) * limit1  	}    	// diaries, err := models.GetWxDiaries(idNum, limit1, offset)  	diaries, err := models.GetWxDiaries2(idNum, limit1, offset)  	if err != nil {  		beego.Error(err)  	}    	doc := document.New()    	for _, v := range diaries {  		did := v.Diary.Id  		// wxsite := beego.AppConfig.String("wxreqeustsite")    		Diary, err := models.GetDiary(did)  		if err != nil {  			beego.Error(err)  		}  		para := doc.AddParagraph()  		run := para.AddRun()  		para.SetStyle("Title")  		run.AddText(Diary.Title)    		para = doc.AddParagraph()  		para.SetStyle("Heading1")  		run = para.AddRun()  		run.AddText(Diary.Diarydate)    		//将一篇日志分段,通过<p标签  		slice1 := make([]DiaryContent, 0)    		var r io.Reader = strings.NewReader(string(Diary.Content))  		goquerydoc, err := goquery.NewDocumentFromReader(r)  		if err != nil {  			beego.Error(err)  		}    		goquerydoc.Find("p").Each(func(i int, s *goquery.Selection) {  			sel, _ := s.Html()  			bb := make([]DiaryContent, 1)  			bb[0].Html = sel  			txt := s.Text()  			bb[0].Txt = txt  			slice1 = append(slice1, bb...)  		})    		for _, w := range slice1 {  			//在每段里查找img标签  			// beego.Info(w)  			var r2 io.Reader = strings.NewReader(w.Html)  			goquerydoc2, err := goquery.NewDocumentFromReader(r2)  			if err != nil {  				beego.Error(err)  			}  			slice2 := make([]Img, 0)  			goquerydoc2.Find("img").Each(func(i int, s2 *goquery.Selection) {  				sel2, _ := s2.Attr("src")  				// beego.Info(sel2)  				aa := make([]Img, 1)  				sel3 := strings.Replace(sel2, "/attachment/", "attachment/", -1)  				aa[0].Src = sel3  				aa[0].Name = path.Base(sel2)  				slice2 = append(slice2, aa...)  			})    			para = doc.AddParagraph()  			para.Properties().SetFirstLineIndent(0.354331 * measurement.Inch)  			run = para.AddRun()  			run.AddText(w.Txt)    			if len(slice2) > 0 {  				for _, x := range slice2 {  					img1, err := common.ImageFromFile(x.Src)  					if err != nil {  						log.Fatalf("unable to create image: %s", err)  					}  					img1ref, err := doc.AddImage(img1)  					if err != nil {  						log.Fatalf("unable to add image to document: %s", err)  					}  					para = doc.AddParagraph()  					run = para.AddRun()    					inl, err := run.AddDrawingInline(img1ref)  					if err != nil {  						log.Fatalf("unable to add inline image: %s", err)  					}  					inl.SetSize(5.5*measurement.Inch, 5.5*measurement.Inch)  				}  			}  		}    	}  	newname := strconv.FormatInt(time.Now().UnixNano(), 10) + ".docx"  	doc.SaveToFile("static/" + newname)  	c.Data["json"] = map[string]interface{}{"info": "SUCCESS", "filename": newname}  	c.ServeJSON()  }