關於java截取多重域名的筆記… 發佈於 2 年前 (2018年01月28日) – 2116 次檢閱

最近一直在做域名篩選,因為之前一直在學javaweb,但是來到這個公司,一直是讓我去截取數據。

所以自己也做下筆記吧..

先上圖再說需求吧

需求:

如:把www.baidu.com 或者news.baidu.com,截取分佈拆成

即,先去除www和後綴com/net/cn/com.cn….

然後輸入結果為:.baidu.(根域名)news.(二級)news.baidu.(二級全域名)

實現步驟…

1.先把數據,即url除去www/後綴

2.使用java代碼去跑,分析判斷,如果遇到4級、5級域名,就重複跑(即把含有幾級的數據,跑到只剩下根域名)

下面是主要代碼吧。有點亂,做的過程我自己也很懵逼…

package com.glj.quchong;     import java.sql.Connection;  import java.sql.ResultSet;  import java.sql.SQLException;  import java.sql.Statement;  import java.text.SimpleDateFormat;  import java.util.Date;     import comglj.utils.CharUtil;  import comglj.utils.JDBCUtil;        public class TestJDBC {  	public static void main(String[] args) {  		try{  			Class.forName("com.mysql.jdbc.Driver");    			Connection conn = JDBCUtil.getConnection();  			Statement s = conn.createStatement();    			String sql = "select * FROM sanji";  			ResultSet rs = s.executeQuery(sql);  			while (rs.next()) {  				String id = rs.getString(1);  				String tac = rs.getString(2);  				String host = rs.getString(3);  				String is_reg = rs.getString(4);  				String host_id = rs.getString(5);  				String pass_time = "";  				SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//設置日期格式  				pass_time = df.format(new Date());  				//判斷第一個.不在開頭and最後一個.不在域名最後面  				if(host.indexOf(".")>0&&host.indexOf(".")+1<host.length()){  					//一級域名  //					System.out.println(host+"--"+host.substring(0,host.indexOf(".")+1));  					String hostByyiji = host.substring(0,host.indexOf(".")+1);  					String sqlByyiji = "insert into yiji (ID,TAC,HOST,IS_REG,HOST_ID,PASS_TIME) "  							+ "values('"+id+"','"+tac+"','"+hostByyiji+"','"+is_reg+"','"+host_id+"','"+pass_time+"')";  					Statement s1 = conn.createStatement();  					s1.execute(sqlByyiji);    					//包含一級二級域名,再次去篩選截取  					String hostByerji = host.substring(host.indexOf("."),host.length());  					if(CharUtil.CharNumber(hostByerji)>2){  						//二級及其以上  //						System.out.println(hostByerji);  						hostByerji = host.substring(host.indexOf(".")+1,host.length());  //						System.out.println(host+"----"+host.substring(host.indexOf(".")+1,host.length()));  						String sqlByerji = "insert into siji (ID,TAC,HOST,IS_REG,HOST_ID,PASS_TIME) "  								+ "values('"+id+"','"+tac+"','"+hostByerji+"','"+is_reg+"','"+host_id+"','"+pass_time+"')";  						Statement sterji = conn.createStatement();  						sterji.execute(sqlByerji);  					}else{  						//根域名  //						System.out.println(hostByerji);  						String sqlBygeng = "insert into geng (ID,TAC,HOST,IS_REG,HOST_ID,PASS_TIME) "  								+ "values('"+id+"','"+tac+"','"+hostByerji+"','"+is_reg+"','"+host_id+"','"+pass_time+"')";  						Statement st1 = conn.createStatement();  						st1.execute(sqlBygeng);  					}  				}else{  					//根域名  //					System.out.println(host);  					String sqlBygeng = "insert into geng (ID,TAC,HOST,IS_REG,HOST_ID,PASS_TIME) "  							+ "values('"+id+"','"+tac+"','"+host+"','"+is_reg+"','"+host_id+"','"+pass_time+"')";  					Statement st1 = conn.createStatement();  					st1.execute(sqlBygeng);  				}  			}  		}catch(ClassNotFoundException | SQLException e){  			e.printStackTrace();  		}  	}  }	

下面代碼是判斷域名裏面還有幾個『.』即可判斷還有幾級沒有拆分

package comglj.utils;     public class CharUtil {  	public static int CharNumber(String url){  		//需要查找重複的字或者符號  		char c = '.';  		int i,index=-1,count=0;  		for(i=0;i<url.length();i++)  		if((url.indexOf(c,index+1))!=-1){  			index=url.indexOf(c,index+1);  			count++;  		}  		return count;  	}  }

數據庫類

package comglj.utils;     import java.sql.Connection;  import java.sql.DriverManager;  import java.sql.PreparedStatement;  import java.sql.ResultSet;  import java.sql.SQLException;     public class JDBCUtil {  	private static String url;  	private static String username;  	private static String password;  	public JDBCUtil() {  		super();  		// TODO Auto-generated constructor stub  	}  	static {  		try {  			Class.forName("com.mysql.jdbc.Driver");  			url = "jdbc:mysql://127.0.0.1:3306/work?characterEncoding=UTF-8";  			username = "root";  			password = "root";  		} catch (Exception e) {  			// TODO: handle exception  		}  	}    	public static Connection getConnection() throws SQLException{  		Connection con = DriverManager.getConnection(url,username,password);  		return con;  	}  	public static void main(String[] args) {  		try {  			Connection conn = JDBCUtil.getConnection();  			String sql = "select * from k2";  			PreparedStatement ps = conn.prepareStatement(sql);  			ResultSet rs =  ps.executeQuery(sql);  			while(rs.next()){  				String name = rs.getString("NAME");  				System.out.println(name);  			}  		} catch (SQLException e) {  			// TODO Auto-generated catch block  			e.printStackTrace();  		}    	}  }

最後在用sql語句 統計拆分出現的次數

SELECT MFR_NAM_ID as ID,TAC,RULE as 域名,count(1) as 次數 from x5 GROUP BY MFR_NAM_ID,TAC,RULE ORDER BY count(1) DESC