人工智能:文本相似度分析

  • 2019 年 10 月 10 日
  • 筆記

通常我们会遇到这么一个问题,就是用户在评论、发文的时候,会时不时的发一些高度相似的内容,显然这是没有任何意义的,这时候我们就可以用一些算法来确定文本的相似度究竟是多少,据此我们可以做出一些回应策略。实现这个功能可以用多种语言来完成,你可以点我去获得其他语言的做法,这里罗列下如何用go语言来实现这个功能:

package main    import (      "io/ioutil"      "net/http"      "net/url"      "fmt"      "strings"  )    //配置您申请的appKey和openId  const APP_KEY ="yours";  const OPEN_ID ="yours";    func requestContent(requestUrl string,params url.Values,method string)(rs[]byte ,err error){        if strings.ToUpper(method)=="GET"{          return get(requestUrl,params)      }      return post(requestUrl,params)  }    // get 网络请求  func get(requestUrl string,params url.Values)(rs[]byte ,err error){      var Url *url.URL      Url,err=url.Parse(requestUrl)      if err!=nil{          fmt.Printf("解析url错误:rn%v",err)          return nil,err      }      //如果参数中有中文参数,这个方法会进行URLEncode      Url.RawQuery=params.Encode()      resp,err:=http.Get(Url.String())      if err!=nil{          fmt.Println("err:",err)          return nil,err      }      defer resp.Body.Close()      return ioutil.ReadAll(resp.Body)  }    // post 网络请求 ,params 是url.Values类型  func post(requestUrl string, params url.Values)(rs[]byte,err error){      resp,err:=http.PostForm(requestUrl, params)      if err!=nil{          return nil ,err      }      defer resp.Body.Close()      return ioutil.ReadAll(resp.Body)  }    func main(){        domain :="http://api.xiaocongjisuan.com/"        servlet :="data/contentsimilarity/analysis"      method :="get"      requestUrl:=domain+servlet        //初始化参数      params:=url.Values{}        params.Set("appKey",APP_KEY)      params.Set("openId",OPEN_ID)        //变动部分      params.Set("content1","我是最可爱的小伙子")      params.Set("content2","我是最漂亮的小姑娘")          //发送请求      data,err:=requestContent(requestUrl,params,method)      fmt.Println(string(data))      if err!=nil{          fmt.Printf("解析url错误:rn%v",err)      }  }

c#语言的实现方式也非常的简单,罗列在下面:

using System;  using System.Collections.Generic;  using System.Text;  using System.Net;  using System.IO;    namespace ConsoleApplication1  {      class Program      {          private static string appKey="yours";          private static string openId = "yours";            static string getResponseAsString(HttpWebResponse rsp, Encoding encoding)          {              System.IO.Stream stream = null;              StreamReader reader = null;              try              {                  // 以字符流的方式读取HTTP响应                  stream = rsp.GetResponseStream();                  reader = new StreamReader(stream, encoding);                  return reader.ReadToEnd();              }              finally              {                  // 释放资源                  if (reader != null) reader.Close();                  if (stream != null) stream.Close();                  if (rsp != null) rsp.Close();              }          }            /*           * parameters 参数           * encode 编码           */            static string buildQuery(IDictionary<string,object> parameters, string encode)          {              StringBuilder postData = new StringBuilder();              bool hasParam = false;              IEnumerator<KeyValuePair<string, object>> dem = parameters.GetEnumerator();              while (dem.MoveNext())              {                  string name = dem.Current.Key;                  string value = dem.Current.Value.ToString(); ;                  // 忽略参数名或参数值为空的参数                  if (!string.IsNullOrEmpty(name))//&& !string.IsNullOrEmpty(value)                  {                      if (hasParam)                      {                          postData.Append("&");                      }                      postData.Append(name);                      postData.Append("=");                      if (encode == "gb2312")                      {                          postData.Append(System.Web.HttpUtility.UrlEncode(value, Encoding.GetEncoding("gb2312")));                      }                      else if (encode == "utf8")                      {                          postData.Append(System.Web.HttpUtility.UrlEncode(value, Encoding.UTF8));                      }                      else                      {                          postData.Append(value);                      }                      hasParam = true;                  }              }              return postData.ToString();          }              /**          *          * @param url 请求地址          * @param params 请求参数          * @param method 请求方法          * @return 请求结果          * @throws Exception          */          static string requestContent(string url, IDictionary<string,object> parameters, string method)          {              if (method.ToLower() == "post")              {                  HttpWebRequest req = null;                  HttpWebResponse rsp = null;                  System.IO.Stream reqStream = null;                  try                  {                      req = (HttpWebRequest)WebRequest.Create(url);                      req.Method = method;                      req.KeepAlive = false;                      req.ProtocolVersion = HttpVersion.Version10;                      req.Timeout = 5000;                      req.ContentType = "application/x-www-form-urlencoded;charset=utf-8";                      byte[] postData = Encoding.UTF8.GetBytes(buildQuery(parameters, "utf8"));                      reqStream = req.GetRequestStream();                      reqStream.Write(postData, 0, postData.Length);                      rsp = (HttpWebResponse)req.GetResponse();                      Encoding encoding = Encoding.GetEncoding(rsp.CharacterSet);                      return getResponseAsString(rsp, encoding);                  }                  catch (Exception ex)                  {                      return ex.Message;                  }                  finally                  {                      if (reqStream != null) reqStream.Close();                      if (rsp != null) rsp.Close();                  }              }              else              {                  //创建请求                  HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url + "?" + buildQuery(parameters, "utf8"));                    //GET请求                  request.Method = "GET";                  request.ReadWriteTimeout = 5000;                  request.ContentType = "text/html;charset=UTF-8";                  HttpWebResponse response = (HttpWebResponse)request.GetResponse();                  Stream myResponseStream = response.GetResponseStream();                  StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("utf-8"));                    //返回内容                  string retString = myStreamReader.ReadToEnd();                  return retString;              }          }            static void Main(string[] args)          {                String domain = "http://api.xiaocongjisuan.com/";              domain = "http://127.0.0.1:8080/xiaocongjisuan/";              String servlet = "data/contentsimilarity/analysis";              String method = "get";              String url = domain + servlet;                  var parameters = new Dictionary<string,object>();                parameters.Add("appKey", appKey);              parameters.Add("openId", openId);                //变动部分              parameters.Add("content1", "我是最可爱的小伙子");              parameters.Add("content2", "我是最漂亮的小姑娘");                string result = requestContent(url, parameters, method);              Console.WriteLine(result);              Console.Read();            }        }  }