關於java截取多重域名的筆記… 發佈於 2 年前 (2018年01月28日) – 2116 次檢閱
- 2020 年 4 月 9 日
- 筆記
最近一直在做域名篩選,因為之前一直在學javaweb,但是來到這個公司,一直是讓我去截取數據。
所以自己也做下筆記吧..
先上圖再說需求吧

需求:
如:把www.baidu.com 或者news.baidu.com,截取分佈拆成
即,先去除www和後綴com/net/cn/com.cn….
然後輸入結果為:.baidu.(根域名)news.(二級)news.baidu.(二級全域名)
實現步驟…
1.先把數據,即url除去www/後綴
2.使用java代碼去跑,分析判斷,如果遇到4級、5級域名,就重複跑(即把含有幾級的數據,跑到只剩下根域名)
下面是主要代碼吧。有點亂,做的過程我自己也很懵逼…
package com.glj.quchong; import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.text.SimpleDateFormat; import java.util.Date; import comglj.utils.CharUtil; import comglj.utils.JDBCUtil; public class TestJDBC { public static void main(String[] args) { try{ Class.forName("com.mysql.jdbc.Driver"); Connection conn = JDBCUtil.getConnection(); Statement s = conn.createStatement(); String sql = "select * FROM sanji"; ResultSet rs = s.executeQuery(sql); while (rs.next()) { String id = rs.getString(1); String tac = rs.getString(2); String host = rs.getString(3); String is_reg = rs.getString(4); String host_id = rs.getString(5); String pass_time = ""; SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//設置日期格式 pass_time = df.format(new Date()); //判斷第一個.不在開頭and最後一個.不在域名最後面 if(host.indexOf(".")>0&&host.indexOf(".")+1<host.length()){ //一級域名 // System.out.println(host+"--"+host.substring(0,host.indexOf(".")+1)); String hostByyiji = host.substring(0,host.indexOf(".")+1); String sqlByyiji = "insert into yiji (ID,TAC,HOST,IS_REG,HOST_ID,PASS_TIME) " + "values('"+id+"','"+tac+"','"+hostByyiji+"','"+is_reg+"','"+host_id+"','"+pass_time+"')"; Statement s1 = conn.createStatement(); s1.execute(sqlByyiji); //包含一級二級域名,再次去篩選截取 String hostByerji = host.substring(host.indexOf("."),host.length()); if(CharUtil.CharNumber(hostByerji)>2){ //二級及其以上 // System.out.println(hostByerji); hostByerji = host.substring(host.indexOf(".")+1,host.length()); // System.out.println(host+"----"+host.substring(host.indexOf(".")+1,host.length())); String sqlByerji = "insert into siji (ID,TAC,HOST,IS_REG,HOST_ID,PASS_TIME) " + "values('"+id+"','"+tac+"','"+hostByerji+"','"+is_reg+"','"+host_id+"','"+pass_time+"')"; Statement sterji = conn.createStatement(); sterji.execute(sqlByerji); }else{ //根域名 // System.out.println(hostByerji); String sqlBygeng = "insert into geng (ID,TAC,HOST,IS_REG,HOST_ID,PASS_TIME) " + "values('"+id+"','"+tac+"','"+hostByerji+"','"+is_reg+"','"+host_id+"','"+pass_time+"')"; Statement st1 = conn.createStatement(); st1.execute(sqlBygeng); } }else{ //根域名 // System.out.println(host); String sqlBygeng = "insert into geng (ID,TAC,HOST,IS_REG,HOST_ID,PASS_TIME) " + "values('"+id+"','"+tac+"','"+host+"','"+is_reg+"','"+host_id+"','"+pass_time+"')"; Statement st1 = conn.createStatement(); st1.execute(sqlBygeng); } } }catch(ClassNotFoundException | SQLException e){ e.printStackTrace(); } } }
下面代碼是判斷域名裏面還有幾個『.』即可判斷還有幾級沒有拆分
package comglj.utils; public class CharUtil { public static int CharNumber(String url){ //需要查找重複的字或者符號 char c = '.'; int i,index=-1,count=0; for(i=0;i<url.length();i++) if((url.indexOf(c,index+1))!=-1){ index=url.indexOf(c,index+1); count++; } return count; } }
數據庫類
package comglj.utils; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; public class JDBCUtil { private static String url; private static String username; private static String password; public JDBCUtil() { super(); // TODO Auto-generated constructor stub } static { try { Class.forName("com.mysql.jdbc.Driver"); url = "jdbc:mysql://127.0.0.1:3306/work?characterEncoding=UTF-8"; username = "root"; password = "root"; } catch (Exception e) { // TODO: handle exception } } public static Connection getConnection() throws SQLException{ Connection con = DriverManager.getConnection(url,username,password); return con; } public static void main(String[] args) { try { Connection conn = JDBCUtil.getConnection(); String sql = "select * from k2"; PreparedStatement ps = conn.prepareStatement(sql); ResultSet rs = ps.executeQuery(sql); while(rs.next()){ String name = rs.getString("NAME"); System.out.println(name); } } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
最後在用sql語句 統計拆分出現的次數
SELECT MFR_NAM_ID as ID,TAC,RULE as 域名,count(1) as 次數 from x5
GROUP BY MFR_NAM_ID,TAC,RULE ORDER BY count(1) DESC