词频统计小程序-WordCount.exe

  • 2019 年 12 月 6 日
  • 笔记

最近顶哥为了完成学历提升学业中的小作业,做了一个词频统计的.exe小程序。因为当时做的时候网上的比较少,因此顶哥决定把自己拙略的作品发出来给需要的人提供一种思路,希望各位看官不要dis才好。最后附上源码链接,感兴趣的朋友可以继续优化哦。

01

先看效果

双击运行,下拉框选择源文件来源,支持本地和网络资源,如图:

本地源文件

网络源文件

02

主要代码

1.pom文件

  <dependencies>          <!--   分词器     -->          <dependency>              <groupId>com.janeluo</groupId>              <artifactId>ikanalyzer</artifactId>              <version>2012_u6</version>          </dependency>          <!--   单元测试     -->          <dependency>              <groupId>junit</groupId>              <artifactId>junit</artifactId>              <version>4.12</version>              <scope>test</scope>          </dependency>          <!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->          <dependency>              <groupId>org.jsoup</groupId>              <artifactId>jsoup</artifactId>              <version>1.11.3</version>          </dependency>      </dependencies>      <build>          <plugins>              <plugin>                  <groupId>org.apache.maven.plugins</groupId>                  <artifactId>maven-surefire-plugin</artifactId>                  <version>2.18.1</version>                  <configuration>                      <skipTests>true</skipTests>                  </configuration>              </plugin>              <!--打包插件 -->              <plugin>                  <groupId>org.apache.maven.plugins</groupId>                  <artifactId>maven-assembly-plugin</artifactId>                  <version>2.4.1</version>                    <configuration>                      <!-- get all project dependencies -->                      <descriptorRefs>                          <descriptorRef>jar-with-dependencies</descriptorRef>                      </descriptorRefs>                      <!-- MainClass in mainfest make a executable jar -->                      <archive>                          <manifest>                              <addClasspath>true</addClasspath>                              <mainClass>cn.dintalk.service.WordCount</mainClass>                          </manifest>                      </archive>                  </configuration>                    <executions>                      <execution>                          <id>make-assembly</id>                          <!-- bind to the packaging phase -->                          <phase>package</phase>                          <goals>                              <goal>single</goal>                          </goals>                      </execution>                  </executions>              </plugin>          </plugins>      </build>

2.WebUtils

/**   * @author Mr.song   * @date 2019/10/13 9:26   */  public class WebUtils {        /**       * 根据url和参数发送get请求       *       * @param url       * @param param       * @return 返回网页内容       */      public static String sendGet(String url, String param) {          String result = "";          if (param != null) {              url = url + "?" + param;          }          try {              URL realUrl = new URL(url);              // 打开和URL之间的连接              HttpURLConnection conn = getHttpURLConnection(realUrl);              result = getResponse(conn);          } catch (Exception e) {              e.printStackTrace();          }          return result;      }          //根据url 获取连接      private static HttpURLConnection getHttpURLConnection(URL realUrl) {          StringBuilder sb = new StringBuilder();          sb.append("Mozilla/5.0 (Windows NT 10.0; Win64; x64)");          sb.append(" AppleWrbKit/537.36(KHTML, like Gecko)");          sb.append(" Chrome/72.0.3626.119 Safari/537.36");          HttpURLConnection conn = null;          try {              // 打开和URL之间的连接              conn = (HttpURLConnection) realUrl.openConnection();              // 设置通用的请求属性              conn.setRequestProperty("accept", "*/*");              conn.setRequestProperty("connection", "Keep-Alive");              conn.setRequestProperty("user-agent", sb.toString());          } catch (IOException e) {              e.printStackTrace();          }          return conn;      }        // 根据url连接获取响应      private static String getResponse(HttpURLConnection conn) {          // 读取URL的响应          String result = "";          try (InputStream is = conn.getInputStream();               InputStreamReader isr = new InputStreamReader(is, "utf-8");               BufferedReader in = new BufferedReader(isr)) {              String line;              while ((line = in.readLine()) != null) {                  result += "n" + line;              }          } catch (Exception e) {              System.out.println("Err:getResponse()");              e.printStackTrace();          } finally {              conn.disconnect();          }  //        System.out.println("getResponse():" + result.length());          return result;      }        /**       * 解析网页为文本       *       * @param html       * @return       */      public static String parseHtmlToText(String html) {          Document document = Jsoup.parse(html);          return document.text();      }  }

3.IKSUtils

/**   * @author Mr.song   * @date 2019/10/10 21:12   */  public class IKSUtils {        /**       * 对文本进行分词       * @param text       * @return       * @throws Exception       */      public static List<String> getStringList(String text) throws Exception{          //独立Lucene实现          StringReader re = new StringReader(text);          IKSegmenter ik = new IKSegmenter(re, true);          Lexeme lex;          List<String> s = new ArrayList<>();          while ((lex = ik.next()) != null) {              s.add(lex.getLexemeText());          }          return s;      }        /**       * 统计词频       * @param wordList       * @return       */      public static Map<String,Integer> wordCount(List<String> wordList){          if (wordList == null) return null;          Map<String,Integer> result = new HashMap<>();          for (String s : wordList) {              Integer count = result.get(s);              if (count ==  null){                  result.put(s,1);              }else {                  result.put(s,++count);              }          }          //按照次数排序          result = result                  .entrySet()                  .stream()                  .sorted(Collections.reverseOrder(Map.Entry.comparingByValue()))                  .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e2,                                  LinkedHashMap::new));          return result;      }  }

03

相关链接

源码地址:https://github.com/MrSonghui/wordCount

打包.exe :https://www.cnblogs.com/xiaoMzjm/p/3879766.html