Elasticsearch第五篇:PlainElastic.Net 操作 Elasticsearch
- 2020 年 8 月 14 日
- 筆記
- elasticsearch
再次強調,我安裝的Elasticsearch 版本是 7.8.0 ,C# 操作 Elasticsearch 的驅動有 NEST、Elasticsearch.net 、PlainElastic.Net 等,當然要注意版本是否匹配,
PlainElastic.Net 是比較舊的操作 Elasticsearch 的方式 ,但是看到不少園友用這個,所以本篇也用 PlainElastic.Net ,稍後有時間會給出 NEST 、 Elasticsearch.net 的 Demo
PlainElastic.Net 的參考文檔、使用方法可以看 //github.com/Yegoroff/PlainElastic.Net
完整的 Demo 見 //github.com/fhrddx/ES_Query ,前端 UI 框架是 aceadmin ,見 //ace.jeka.by/
先看一下效果圖
1、首先是封裝 ElasticSearchHelper
public class ElasticSearchHelper { public static readonly ElasticSearchHelper Intance = new ElasticSearchHelper(); private ElasticConnection Client; private ElasticSearchHelper() { Client = new ElasticConnection("localhost", 9200); } }
2、在 ElasticSearchHelper 設定字段mapping 並生成索引庫 db_student_test1
public bool BuildStudentMapping() { var mapping = new { mappings = new { properties = new { name = new { type = "text", analyzer = "standard" }, school = new { type = "text", analyzer = "ik_max_word" }, desc = new { type = "text", analyzer = "ik_max_word" }, @class = new { type = "integer" }, chinese = new { type = "integer" }, english = new { type = "integer" }, math = new { type = "integer" } } } }; string jsonDocument = new JsonNetSerializer().Serialize(mapping); OperationResult operationResult = Client.Put("db_student_test1", jsonDocument); CommandResult result = new JsonNetSerializer().ToCommandResult(operationResult.Result); if (result?.acknowledged != null) return result.acknowledged; return false; }
3、在 ElasticSearchHelper中,給與索引庫 db_student_test1 一個別名 student_test1
public bool Alias() { OperationResult operationResult = Client.Put("db_student_test1/_alias/student_test1"); CommandResult result = new JsonNetSerializer().ToCommandResult(operationResult.Result); if (result?.acknowledged != null) return result.acknowledged; return false; }
4、創建索引文檔
public IndexResult CreateIndex(string indexName, string id, string jsonDocument) { var serializer = new JsonNetSerializer(); //注意ES版本是8.7.0,type只能是默認的、唯一的 _doc string cmd = new IndexCommand(indexName, "_doc", id); Client.Timeout = 30000; OperationResult result = Client.Put(cmd, jsonDocument); var indexResult = serializer.ToIndexResult(result.Result); return indexResult; } public IndexResult CreateIndex(string indexName, string id, object document) { var serializer = new JsonNetSerializer(); var jsonDocument = serializer.Serialize(document); return CreateIndex(indexName, id, jsonDocument); }
5、隨機生成測試數據,網上下載一篇txt的長篇小說,隨機截取字段,然後寫入Elasticsearch
測試數據如下:
public class TestData { public static string xing = @"王李張劉陳楊趙黃周吳徐孫胡朱高林何郭馬羅梁宋鄭謝韓唐馮於董蕭程曹袁鄧許傅沈曾彭呂蘇盧蔣蔡賈丁魏薛葉閻余潘杜戴夏鍾汪田任姜范方石姚譚廖鄒熊金陸郝孔白崔康毛邱秦江史顧侯邵孟龍萬段雷錢湯尹黎易常武喬賀賴龔文龐樊蘭殷施陶洪翟安顏倪嚴牛溫蘆季俞章魯葛伍韋申尤畢聶叢焦向柳邢路岳齊沿梅莫庄辛管祝左塗谷祁時舒耿牟卜肖詹關苗凌費紀靳盛童歐甄項曲成游陽裴席衛查屈鮑位覃霍翁隋植甘景薄單包司柏寧柯阮桂閔歐陽解強柴華車冉房邊凈陰閆佘練駱付代麥容悲初瞿褚班全名井米談宮虞奚佟符蒲穆漆卞東儲黨從艾苻厲岑燕吉冷仇伊首郁婁楚鄺歷狄簡胥連帥封危支原滕苑信索栗官沙池藏師國鞏刁茅杭巫居竇皮戈麻饒習巴曠宗荊榮孝藺廉員西寇刃見底區酈卓琚續朴蒙敖花應喻冀尚頓菅嵇雒弓忻權諶卿扈海冼倫鹿宿山桑裘達么智宣尉遲東方幺郎農戚屠樓步鞠仲尉藍招攀欒籍壽鄔莢稅逄加勾由福緱欽鮮於但邸逢況鄢古樂斯鈕蓋旦毅邰哈鄂商英遲仝亓玄黑騰晏禹諸苟湛殳亢奉占聞粟種匡賓勞申屠伏過水真宇巢計羌相辜展丑銀豐矯上昝繩臧舍郅布糜烏衣來恆那滿門司徒皋旺公言藤釋堯繆干闞靖渠契晉六束良鶚貝邴沃竺揚勵歸上官荃焉多都果郜隆諸葛令狐慕禮祖翦力朗撖修呼富明站虢冶茹禚笪雲肇平弋候爾姬寶暢冒邾延禪浦敬頡南巍補"; public static string name = @"帆棟祜權錕坤允騫諦初盛炳初澤榮喆恆鶴禮華帝宇中鑫彬槐禧允翱鵬皓中偉炳皓槐帆芃欣鑫振傑誠錕濰吉軒福宇初柏芃翰浩峰延帆欣帆奇郁爍卓仕吉帝濰釗傑鑫星諦鑫銘鋒沛芃澤祿勇峰欣延鶴郁信俠翰邦寅軒澤哲佑福翱恆文楓澄棟翰中震杞斌凱錦升逸延騰諦權盛弘爍俊強博祿中欣權浩陽裕延盛平暢沛吉強駿起華炳騰柏佑暢傑凱鴻斌加振晨沛祥祜盛濡彬成弘天福錦穎嘉茜芸格美漫慧漫妍鈺琪玥沛玥鑫潔嵐采曼珍雪昕婷碧弦雪潔馨昕香弦帆芳菲楠俊月珊函蔚帆靈靈蓮優蔚碧文蕾婭林婧妮婷薇馨淑惠杉美梔怡薇琪曦雲漫瑤韻楠妮穎妮杉媛詩芳菲錦錦蕾芸歡珍嵐鶴莉優雲舒舒璇慧依菡雅妍楠雅慧靈陽漫珠帆媛可雅欣鑫妮雯霞柔芳芝琳彩冰林媛柔初倩玉冰薇潔妍潔璐采彩穎呈雪雲歡琪璟紫靜蓓薇歡薇柔晨萱雲歆鑫月陽婭媛露露琳"; public static string[] school = new string[] { "中山大學", "暨南大學", "汕頭大學", "華南理工大學", "華南農業大學", "廣東海洋大學", "廣州醫科大學", "廣州中醫藥大學", "華南師範大學", "韶關學院", "深圳大學", "廣東財經大學", "廣東工業大學", "東莞理工學院", "南方科技大學", "香港中文大學", "廣州商學院", "上海交通大學", "同濟大學", "復旦大學", "上海大學", "上海財經大學", "北京大學", "清華大學", "北京郵電大學", "中國人民大學", "北京理工大學" }; public static string content = @"第一回 甄士隱夢幻識通靈 賈雨村風塵懷閨秀() 此開卷第一回也.作者自云:因曾歷過一番夢幻之後,故將真事隱去,而借」通靈」之說,撰此《石頭記》一書也.故曰」甄士隱」云云.但書所記何事何人?自又云:「今風塵碌碌,一事無成,忽念及當日所有之女子,一一細考較去,覺其行止見識,皆出於我之上.何我堂堂鬚眉,誠不若彼裙釵哉?實愧則有餘,悔又無益之大無可如何之日也!當此,則自欲將已往所賴天恩祖德,錦衣紈絝之時,飫甘饜肥之日,背父兄教育之恩,負師友規談之德,以至今日一技無成,半生潦倒之罪,編述一集,以告天下人:我之罪固不免,然閨閣本自歷歷有人,萬不可因我之不肖,自護己短,一併使其泯滅也.雖今日之茅椽蓬牖,瓦灶繩床,其晨夕風露,階柳庭花,亦未有妨我之襟懷筆墨者.雖我未學,下筆無,又何妨用假語村言,敷演出一段故事來,亦可使閨閣昭傳,復可悅世之目,破人愁悶,不亦宜乎?」故曰」賈雨村」云云. ...... ......
由來同一夢,休笑世人痴!"; }
隨機生成學生記錄的代碼如下:
int xing_length = TestData.xing.Length; int name_length = TestData.name.Length; int school_length = TestData.school.Length; int content_length = TestData.content.Length; ParallelOptions _po = new ParallelOptions(); _po.MaxDegreeOfParallelism = 4; Parallel.For(0, 100000000, _po, c => { Random r = new Random(c); Random r2 = new Random(); try { string desc = TestData.content.Substring((r.Next(0, content_length - 700)), 20).Trim().Replace("/r/n", string.Empty); Student model = new Student() { name = TestData.xing[r.Next(0, xing_length)].ToString() + TestData.name.Substring(r.Next(0, name_length / 2) * 2, 2), school = TestData.school[r.Next(0, school_length)], chinese = r.Next(25, 80) + r2.Next(0, 20), math = r.Next(15, 60) + r2.Next(0, 40), english = r.Next(21, 70) + r2.Next(0, 30), @class = c, desc = desc + TestData.school[r2.Next(0, school_length)] }; ElasticSearchHelper.Intance.CreateIndex("db_student_test1", Guid.NewGuid().ToString(), model); } catch (Exception ex) { Console.Write(ex.ToString()); } });
6、單個詞語查詢,例如查詢滿足以下條件的文檔:(1) desc 包含 「黛玉」 (2)chinese、math、english 都大於90(3)分頁取前10條記錄(4)關鍵詞高亮(5)排序按照語文、數學、英語倒序
controller 代碼如下
public ActionResult Index(string key = "黛玉") { Stopwatch sw = new Stopwatch(); sw.Restart(); var model = ElasticSearchHelper.Intance.Term(key.Trim(), 0, 10); sw.Stop(); ViewBag.Message = $"共耗時{sw.ElapsedMilliseconds}毫秒"; return View(model); }
ElasticsearchHelper 代碼如下
public ElasticsearchResult<Student> Term(string key, int from = 0, int size = 10) { if (string.IsNullOrEmpty(key)) return null; key = key.Trim(); string cmd = new SearchCommand("student_test1", "_doc"); var query = new QueryBuilder<Student>().Query( b => b.Bool(m => m.Must(t => t.Term(d => d.Field("desc").Value(key)) .Range(d => d.Field("chinese").From("90").To("100")) .Range(d => d.Field("math").Gt("90")) .Range(d => d.Field("english").Gt("90")) ) ) ) .From(from) .Size(size) .Sort(s => s.Field("chinese", SortDirection.desc).Field("math", SortDirection.desc).Field("english", SortDirection.desc)) .Highlight(h => h .PreTags("<span class=\"label label-sm label-danger\">") .PostTags("</span>") .Fields( f => f.FieldName("desc").Order(HighlightOrder.score) ) ) .Build(); string result = Client.Post(cmd, query); var list = new JsonNetSerializer().Deserialize<ElasticsearchResult<Student>>(result); return list; }
效果是
7、語句匹配查詢,關鍵詞是「黛玉」,當然是可以查出來,但是如果用戶知道紅樓夢有個情節,是關於大觀園裡眾人舉辦螃蟹宴,作詩玩樂的,想把相關文段查詢出來,這時用戶輸入的關鍵詞是 「寶玉黛玉螃蟹宴作詩」 ,這時候會查詢出什麼呢?
首先改一下邏輯,controller 代碼為
public ActionResult Query(string key = "寶玉黛玉螃蟹宴作詩") { Stopwatch sw = new Stopwatch(); sw.Restart(); var model = ElasticSearchHelper.Intance.Query(key.Trim(), 0, 10); sw.Stop(); ViewBag.Message = $"共耗時{sw.ElapsedMilliseconds}毫秒"; return View("~/Views/ES/Index.cshtml", model); }
ElasticsearchHelper 代碼如下
public ElasticsearchResult<Student> Query(string key, int from = 0, int size = 10) { if (string.IsNullOrEmpty(key)) return null; key = key.Trim(); string cmd = new SearchCommand("student_test1", "_doc"); var query = new QueryBuilder<Student>().Query( b => b.Bool(m => m.Must(t => //其實也是可以用 t.match() 的,可以試一下 t.QueryString(d => d.DefaultField("desc").Query(key)) .Range(d => d.Field("chinese").From("90").To("100")) .Range(d => d.Field("math").Gt("90")) .Range(d => d.Field("english").Gt("90")) ) ) ) .From(from) .Size(size) //這裡不再按照分數來排序,這時ES會根據關鍵詞匹配度來排序,出現在最前的,應該是最匹配的 //.Sort(s => s.Field("chinese", SortDirection.desc).Field("math", SortDirection.desc).Field("english", SortDirection.desc)) .Highlight(h => h .PreTags("<span class=\"label label-sm label-danger\">") .PostTags("</span>") .Fields( f => f.FieldName("desc").Order(HighlightOrder.score) ) ) .Build(); string result = Client.Post(cmd, query); var list = new JsonNetSerializer().Deserialize<ElasticsearchResult<Student>>(result); return list; }
查詢結果是
可以看到,ES 會把 「寶玉黛玉螃蟹宴作詩」 進行分詞,然後進行文本匹配。
PlainElastic.Net 是比較舊的版本了,不是很適合ES版本 7.8.0 可以用 NEST 或者 Elasticsearch.net ,我這裡是參考一些園友,然後寫個Demo出來測試一下。
(未完,ES 聚合統計的,待續)