httpclient 多執行緒爬蟲實例

  • 2019 年 10 月 4 日
  • 筆記

本人最近在研究安全測試的過程中,偶然發現某站一個漏洞,在獲取資源的時候竟然不需要校驗,原來設定的用戶每天獲取資源的次數限制就沒了。趕緊想到用爬蟲多爬一些數據,但是奈何數據量太大了,所以想到用多執行緒來爬蟲。經過嘗試終於完成了,腳本寫得比較粗糙,因為沒真想爬完。預計10萬數據量,10個執行緒,每個執行緒爬1萬,每次爬100個數據(竟然是 get 介面,有 url 長度限制)。

分享程式碼,供大家參考。

package practise;    import java.util.Date;  import java.util.concurrent.CountDownLatch;  import java.util.concurrent.ExecutorService;  import java.util.concurrent.Executors;  import org.apache.http.client.methods.HttpGet;  import net.sf.json.JSONObject;  import source.ApiLibrary;    public class LoginDz extends ApiLibrary {        public static void main(String[] args) {          LoginDz loginDz = new LoginDz();          loginDz.excuteTreads();          testOver();      }        public JSONObject getTi(int[] code, String name) {          JSONObject response = null;          String url = "***********";          JSONObject args = new JSONObject();          // args.put("ID_List", getTiId(884969));          args.put("ID_List", getTiId(code));          HttpGet httpGet = getHttpGet(url, args);          response = getHttpResponseEntityByJson(httpGet);          // output(response.toString());          String text = response.toString();          if (!text.equals("{"success_response":[]}"))              logLog("name", response.toString());          output(response);          return response;      }          public String getTiId(int... id) {          StringBuffer result = new StringBuffer();          int length = id.length;          for (int i = 0; i < length; i++) {              String abc = "filter[where][origDocID][inq]=" + id[i] + "&";              result.append(abc);          }          return result.toString();      }        /**       * 執行多執行緒任務       */      public void excuteTreads() {          int threads = 10;          ExecutorService executorService = Executors.newFixedThreadPool(threads);          CountDownLatch countDownLatch = new CountDownLatch(threads);          Date start = new Date();          for (int i = 0; i < threads; i++) {              executorService.execute(new More(countDownLatch, i));          }          try {              countDownLatch.await();              executorService.shutdown();          } catch (InterruptedException e) {              e.printStackTrace();          }          Date end = new Date();          outputTimeDiffer(start, end);      }        /**       * 多執行緒類       */      class More implements Runnable {          public CountDownLatch countDownLatch;          public int num;            public More(CountDownLatch countDownLatch, int num) {              this.countDownLatch = countDownLatch;              this.num = num;          }            @Override          public void run() {              int bound = num * 10000;                try {                  for (int i = bound; i < bound + 10000; i += 100) {                      int[] ids = new int[100];                      for (int k = 0; k < 100; k++) {                          ids[i] = i + k;                          getTi(ids, bound + "");                      }                  }              } finally {                  countDownLatch.countDown();              }          }        }    }