人工智能:文本相似度分析
- 2019 年 10 月 10 日
- 筆記
通常我们会遇到这么一个问题,就是用户在评论、发文的时候,会时不时的发一些高度相似的内容,显然这是没有任何意义的,这时候我们就可以用一些算法来确定文本的相似度究竟是多少,据此我们可以做出一些回应策略。实现这个功能可以用多种语言来完成,你可以点我去获得其他语言的做法,这里罗列下如何用go语言来实现这个功能:
package main import ( "io/ioutil" "net/http" "net/url" "fmt" "strings" ) //配置您申请的appKey和openId const APP_KEY ="yours"; const OPEN_ID ="yours"; func requestContent(requestUrl string,params url.Values,method string)(rs[]byte ,err error){ if strings.ToUpper(method)=="GET"{ return get(requestUrl,params) } return post(requestUrl,params) } // get 网络请求 func get(requestUrl string,params url.Values)(rs[]byte ,err error){ var Url *url.URL Url,err=url.Parse(requestUrl) if err!=nil{ fmt.Printf("解析url错误:rn%v",err) return nil,err } //如果参数中有中文参数,这个方法会进行URLEncode Url.RawQuery=params.Encode() resp,err:=http.Get(Url.String()) if err!=nil{ fmt.Println("err:",err) return nil,err } defer resp.Body.Close() return ioutil.ReadAll(resp.Body) } // post 网络请求 ,params 是url.Values类型 func post(requestUrl string, params url.Values)(rs[]byte,err error){ resp,err:=http.PostForm(requestUrl, params) if err!=nil{ return nil ,err } defer resp.Body.Close() return ioutil.ReadAll(resp.Body) } func main(){ domain :="http://api.xiaocongjisuan.com/" servlet :="data/contentsimilarity/analysis" method :="get" requestUrl:=domain+servlet //初始化参数 params:=url.Values{} params.Set("appKey",APP_KEY) params.Set("openId",OPEN_ID) //变动部分 params.Set("content1","我是最可爱的小伙子") params.Set("content2","我是最漂亮的小姑娘") //发送请求 data,err:=requestContent(requestUrl,params,method) fmt.Println(string(data)) if err!=nil{ fmt.Printf("解析url错误:rn%v",err) } }
c#语言的实现方式也非常的简单,罗列在下面:
using System; using System.Collections.Generic; using System.Text; using System.Net; using System.IO; namespace ConsoleApplication1 { class Program { private static string appKey="yours"; private static string openId = "yours"; static string getResponseAsString(HttpWebResponse rsp, Encoding encoding) { System.IO.Stream stream = null; StreamReader reader = null; try { // 以字符流的方式读取HTTP响应 stream = rsp.GetResponseStream(); reader = new StreamReader(stream, encoding); return reader.ReadToEnd(); } finally { // 释放资源 if (reader != null) reader.Close(); if (stream != null) stream.Close(); if (rsp != null) rsp.Close(); } } /* * parameters 参数 * encode 编码 */ static string buildQuery(IDictionary<string,object> parameters, string encode) { StringBuilder postData = new StringBuilder(); bool hasParam = false; IEnumerator<KeyValuePair<string, object>> dem = parameters.GetEnumerator(); while (dem.MoveNext()) { string name = dem.Current.Key; string value = dem.Current.Value.ToString(); ; // 忽略参数名或参数值为空的参数 if (!string.IsNullOrEmpty(name))//&& !string.IsNullOrEmpty(value) { if (hasParam) { postData.Append("&"); } postData.Append(name); postData.Append("="); if (encode == "gb2312") { postData.Append(System.Web.HttpUtility.UrlEncode(value, Encoding.GetEncoding("gb2312"))); } else if (encode == "utf8") { postData.Append(System.Web.HttpUtility.UrlEncode(value, Encoding.UTF8)); } else { postData.Append(value); } hasParam = true; } } return postData.ToString(); } /** * * @param url 请求地址 * @param params 请求参数 * @param method 请求方法 * @return 请求结果 * @throws Exception */ static string requestContent(string url, IDictionary<string,object> parameters, string method) { if (method.ToLower() == "post") { HttpWebRequest req = null; HttpWebResponse rsp = null; System.IO.Stream reqStream = null; try { req = (HttpWebRequest)WebRequest.Create(url); req.Method = method; req.KeepAlive = false; req.ProtocolVersion = HttpVersion.Version10; req.Timeout = 5000; req.ContentType = "application/x-www-form-urlencoded;charset=utf-8"; byte[] postData = Encoding.UTF8.GetBytes(buildQuery(parameters, "utf8")); reqStream = req.GetRequestStream(); reqStream.Write(postData, 0, postData.Length); rsp = (HttpWebResponse)req.GetResponse(); Encoding encoding = Encoding.GetEncoding(rsp.CharacterSet); return getResponseAsString(rsp, encoding); } catch (Exception ex) { return ex.Message; } finally { if (reqStream != null) reqStream.Close(); if (rsp != null) rsp.Close(); } } else { //创建请求 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url + "?" + buildQuery(parameters, "utf8")); //GET请求 request.Method = "GET"; request.ReadWriteTimeout = 5000; request.ContentType = "text/html;charset=UTF-8"; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); Stream myResponseStream = response.GetResponseStream(); StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("utf-8")); //返回内容 string retString = myStreamReader.ReadToEnd(); return retString; } } static void Main(string[] args) { String domain = "http://api.xiaocongjisuan.com/"; domain = "http://127.0.0.1:8080/xiaocongjisuan/"; String servlet = "data/contentsimilarity/analysis"; String method = "get"; String url = domain + servlet; var parameters = new Dictionary<string,object>(); parameters.Add("appKey", appKey); parameters.Add("openId", openId); //变动部分 parameters.Add("content1", "我是最可爱的小伙子"); parameters.Add("content2", "我是最漂亮的小姑娘"); string result = requestContent(url, parameters, method); Console.WriteLine(result); Console.Read(); } } }