关于java截取多重域名的笔记… 發布於 2 年前 (2018年01月28日) – 2116 次检阅

最近一直在做域名筛选,因为之前一直在学javaweb,但是来到这个公司,一直是让我去截取数据。

所以自己也做下笔记吧..

先上图再说需求吧

需求:

如:把www.baidu.com 或者news.baidu.com,截取分布拆成

即,先去除www和后缀com/net/cn/com.cn….

然后输入结果为:.baidu.(根域名)news.(二级)news.baidu.(二级全域名)

实现步骤…

1.先把数据,即url除去www/后缀

2.使用java代码去跑,分析判断,如果遇到4级、5级域名,就重复跑(即把含有几级的数据,跑到只剩下根域名)

下面是主要代码吧。有点乱,做的过程我自己也很懵逼…

package com.glj.quchong;     import java.sql.Connection;  import java.sql.ResultSet;  import java.sql.SQLException;  import java.sql.Statement;  import java.text.SimpleDateFormat;  import java.util.Date;     import comglj.utils.CharUtil;  import comglj.utils.JDBCUtil;        public class TestJDBC {  	public static void main(String[] args) {  		try{  			Class.forName("com.mysql.jdbc.Driver");    			Connection conn = JDBCUtil.getConnection();  			Statement s = conn.createStatement();    			String sql = "select * FROM sanji";  			ResultSet rs = s.executeQuery(sql);  			while (rs.next()) {  				String id = rs.getString(1);  				String tac = rs.getString(2);  				String host = rs.getString(3);  				String is_reg = rs.getString(4);  				String host_id = rs.getString(5);  				String pass_time = "";  				SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//设置日期格式  				pass_time = df.format(new Date());  				//判断第一个.不在开头and最后一个.不在域名最后面  				if(host.indexOf(".")>0&&host.indexOf(".")+1<host.length()){  					//一级域名  //					System.out.println(host+"--"+host.substring(0,host.indexOf(".")+1));  					String hostByyiji = host.substring(0,host.indexOf(".")+1);  					String sqlByyiji = "insert into yiji (ID,TAC,HOST,IS_REG,HOST_ID,PASS_TIME) "  							+ "values('"+id+"','"+tac+"','"+hostByyiji+"','"+is_reg+"','"+host_id+"','"+pass_time+"')";  					Statement s1 = conn.createStatement();  					s1.execute(sqlByyiji);    					//包含一级二级域名,再次去筛选截取  					String hostByerji = host.substring(host.indexOf("."),host.length());  					if(CharUtil.CharNumber(hostByerji)>2){  						//二级及其以上  //						System.out.println(hostByerji);  						hostByerji = host.substring(host.indexOf(".")+1,host.length());  //						System.out.println(host+"----"+host.substring(host.indexOf(".")+1,host.length()));  						String sqlByerji = "insert into siji (ID,TAC,HOST,IS_REG,HOST_ID,PASS_TIME) "  								+ "values('"+id+"','"+tac+"','"+hostByerji+"','"+is_reg+"','"+host_id+"','"+pass_time+"')";  						Statement sterji = conn.createStatement();  						sterji.execute(sqlByerji);  					}else{  						//根域名  //						System.out.println(hostByerji);  						String sqlBygeng = "insert into geng (ID,TAC,HOST,IS_REG,HOST_ID,PASS_TIME) "  								+ "values('"+id+"','"+tac+"','"+hostByerji+"','"+is_reg+"','"+host_id+"','"+pass_time+"')";  						Statement st1 = conn.createStatement();  						st1.execute(sqlBygeng);  					}  				}else{  					//根域名  //					System.out.println(host);  					String sqlBygeng = "insert into geng (ID,TAC,HOST,IS_REG,HOST_ID,PASS_TIME) "  							+ "values('"+id+"','"+tac+"','"+host+"','"+is_reg+"','"+host_id+"','"+pass_time+"')";  					Statement st1 = conn.createStatement();  					st1.execute(sqlBygeng);  				}  			}  		}catch(ClassNotFoundException | SQLException e){  			e.printStackTrace();  		}  	}  }	

下面代码是判断域名里面还有几个‘.’即可判断还有几级没有拆分

package comglj.utils;     public class CharUtil {  	public static int CharNumber(String url){  		//需要查找重复的字或者符号  		char c = '.';  		int i,index=-1,count=0;  		for(i=0;i<url.length();i++)  		if((url.indexOf(c,index+1))!=-1){  			index=url.indexOf(c,index+1);  			count++;  		}  		return count;  	}  }

数据库类

package comglj.utils;     import java.sql.Connection;  import java.sql.DriverManager;  import java.sql.PreparedStatement;  import java.sql.ResultSet;  import java.sql.SQLException;     public class JDBCUtil {  	private static String url;  	private static String username;  	private static String password;  	public JDBCUtil() {  		super();  		// TODO Auto-generated constructor stub  	}  	static {  		try {  			Class.forName("com.mysql.jdbc.Driver");  			url = "jdbc:mysql://127.0.0.1:3306/work?characterEncoding=UTF-8";  			username = "root";  			password = "root";  		} catch (Exception e) {  			// TODO: handle exception  		}  	}    	public static Connection getConnection() throws SQLException{  		Connection con = DriverManager.getConnection(url,username,password);  		return con;  	}  	public static void main(String[] args) {  		try {  			Connection conn = JDBCUtil.getConnection();  			String sql = "select * from k2";  			PreparedStatement ps = conn.prepareStatement(sql);  			ResultSet rs =  ps.executeQuery(sql);  			while(rs.next()){  				String name = rs.getString("NAME");  				System.out.println(name);  			}  		} catch (SQLException e) {  			// TODO Auto-generated catch block  			e.printStackTrace();  		}    	}  }

最后在用sql语句 统计拆分出现的次数

SELECT MFR_NAM_ID as ID,TAC,RULE as 域名,count(1) as 次数 from x5 GROUP BY MFR_NAM_ID,TAC,RULE ORDER BY count(1) DESC