Java爬蟲(一)利用GET和POST發送請求,獲取服務器返回信息


本人所使用軟件

  • eclipse
  • fiddle
  • UC瀏覽器

分析請求信息

以知乎(https://www.zhihu.com)為例,模擬登陸請求,獲取登陸后首頁,首先就是分析請求信息。

用UC瀏覽器F12,點擊Network,按F5刷新。使用自己賬號登陸知乎后,點www.zhihu.com網址后,出現以下界面

這里寫圖片描述
在General中,看到請求方式是GET,在fiddle里請求構造中,方法選定GET。
下拉后,看到Request Header,將里面所有的內容復制下來,粘貼到fiddle的請求構造里
這里寫圖片描述

這里寫圖片描述

點擊Execute,在fiddle中點擊訪問的網址,點擊嗅探,點擊下方的TextView,發現並沒有顯示內容

這里寫圖片描述

刪除部分無用的Request Header,點擊Execute,發現返回數據成功!

這里寫圖片描述
這里寫圖片描述

發送請求信息,獲取數據

從以上的分析可以知道,訪問知乎需要的Request Header,只需要有COOKIE就足夠了,因此,我們將請求構造里幫我們格式化的內容,復制到txt文件內

文件名為(requestheader.txt)

接下來就是使用Java發送請求信息了。發送請求信息很簡單,就直接貼出代碼共同討論。

package Main;

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.util.HashMap;
import java.util.Map;

import Utils.HttpUtils;
import Utils.HttpUtils.OnVisitingListener;
import Utils.StreamUtils;
import Utils.StreamUtils.OnGetStringListener;

public class Main {
public static void main(String[] args) {
// 獲取網頁數據
getWebData();
// 設置參數
// 得到返回數據
}

private static void getWebData() {
HttpUtils httpUtils = HttpUtils.newInstance();
httpUtils.setOnVisitingListener(new OnVisitingListener() {

@Override
public void onSuccess(HttpURLConnection conn) {
try {
InputStream inputStream = conn.getInputStream();
String string = StreamUtils.getString(inputStream);
System.out.println(string);
} catch (IOException e) {
e.printStackTrace();
}
}

@Override
public void onSetDetails(HttpURLConnection conn, HttpUtils httpUtils) {
Map<String, String> map = new HashMap<String, String>();
StreamUtils.getString("requestheader.txt", new OnGetStringListener() {

@Override
public void onGeted() {
}

@Override
public void onGetString(String line) {
System.out.println(line);
String[] strings = line.split(":");
map.put(strings[0], strings[1]);
}
});
httpUtils.setRequestProperties(map);
}

@Override
public void onFail(IOException e) {
}
}).startConnenction("https://www.zhihu.com/", "GET");
}}
  • Utils封裝工具類
    package Utils;

import java.io.IOException;
import java.io.PrintWriter;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

/**
* Created by admin on 2016/3/2.
*/
public class HttpUtils {
private HttpURLConnection conn;

public void setConnection(String fileUrl, String method) throws IOException {
URL url = new URL(fileUrl);
conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod(method);
conn.setConnectTimeout(5000);
conn.setReadTimeout(5000);
listener.onSetDetails(conn, this);
conn.connect();
}

OnVisitingListener listener;

public interface OnVisitingListener {
void onSuccess(HttpURLConnection conn);

void onSetDetails(HttpURLConnection conn, HttpUtils httpUtils);

void onFail(IOException e);
}

public HttpUtils setOnVisitingListener(OnVisitingListener listener) {
this.listener = listener;
return this;
}

public void startConnenction(String url, String method) {
try {
setConnection(url, method);
if (conn.getResponseCode() == 200) {
listener.onSuccess(conn);
} else {
throw new IOException();
}
} catch (IOException e) {
listener.onFail(e);
}
// if (conn != null) {
// conn.disconnect();
// }
}

public void setRequestProperties(Map<String, String> map) {
String key;
String value;
Set<String> set = map.keySet();
Iterator<String> it = set.iterator();
while (it.hasNext()) {
key = it.next();
value = map.get(key);

conn.setRequestProperty(key, value);
}
}

public void setRequestBody(String body) {
try {
PrintWriter writer = new PrintWriter(conn.getOutputStream());
writer.write(body);
writer.flush();
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}

public void setRequestProperty(String type, String value) {
conn.setRequestProperty(type, value);
}

public static HttpUtils newInstance() {
return new HttpUtils();
}

}

package Utils;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class RegexUtils {
public static String RegexGroup(String targetStr, String patternStr, int which) {
Pattern pattern = Pattern.compile(patternStr);
Matcher matcher = pattern.matcher(targetStr);
if (matcher.find()) {
return matcher.group(which);
}
return “Nothing!”;
}

public static List<String> RegexGroups(String targetStr, String patternStr, int which) {
Pattern pattern = Pattern.compile(patternStr);
Matcher matcher = pattern.matcher(targetStr);
List<String> list = new ArrayList<String>();
while (matcher.find()) {
list.add(matcher.group(which));
}
return list;
}

public static String RegexString(String targetStr, String patternStr) {
Pattern pattern = Pattern.compile(patternStr);
Matcher matcher = pattern.matcher(targetStr);
if (matcher.find()) {
return matcher.group();
}
return "Nothing!";
}}

package Utils;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.io.Writer;

/**
* Created by admin on 2016/2/18.
*/
public class StreamUtils {
public static String readFromStream(InputStream inputStream) throws IOException {
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
int len = 0;
byte[] buffer = new byte[1024];

    while ((len = inputStream.read(buffer)) != -1) {
outputStream.write(buffer, 0, len);
}

String result = outputStream.toString();
inputStream.close();
outputStream.close();
return result;
}

private static String line;

public static FileReader createFileReader(File file) throws FileNotFoundException {
return new FileReader(file);
}

public static FileWriter createFileWriter(File file) throws IOException {
return new FileWriter(file);
}

public static InputStreamReader createInputStreamReader(Object obj) {
if (obj instanceof File)
if (!((File) obj).exists())
((File) obj).getParentFile().mkdirs();
try {
return new InputStreamReader(new FileInputStream((File) obj), "utf-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
return null;
}

public static OutputStreamWriter createOutputStreamWriter(Object obj) {
if (obj instanceof File)
if (!((File) obj).exists())
((File) obj).getParentFile().mkdirs();
try {
return new OutputStreamWriter(new FileOutputStream((File) obj, true), "utf-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
return null;
}

public static BufferedReader createBufferedReader(Object obj, String cd) throws IOException {
if (obj instanceof String)
return new BufferedReader(createInputStreamReader(new File((String) obj)));
if (obj instanceof InputStream) {
if (cd == null)
return new BufferedReader(new InputStreamReader((InputStream) obj));
else
return new BufferedReader(new InputStreamReader((InputStream) obj, cd));
}
if (obj instanceof File) {
if (!((File) obj).exists())
((File) obj).createNewFile();
return new BufferedReader(createFileReader((File) obj));
}
if (obj instanceof Reader)
return new BufferedReader((Reader) obj);
if (obj instanceof BufferedReader)
return (BufferedReader) obj;
return null;
}

public static BufferedWriter createBufferedWriter(Object obj) throws IOException {
if (obj instanceof String)
return new BufferedWriter(createOutputStreamWriter(new File((String) obj)));
if (obj instanceof OutputStream)
return new BufferedWriter(new OutputStreamWriter((OutputStream) obj, "utf-8"));
if (obj instanceof File)
return new BufferedWriter(createOutputStreamWriter(obj));
if (obj instanceof Writer)
return new BufferedWriter((Writer) obj);
if (obj instanceof BufferedWriter)
return (BufferedWriter) obj;
return null;
}

public interface OnGetStringListener {
void onGetString(String line);

void onGeted();
}

public static void getString(Object obj, OnGetStringListener listener) {
BufferedReader br;
try {
br = createBufferedReader(obj, null);
if (br != null) {
while ((line = br.readLine()) != null) {
listener.onGetString(line);
}
listener.onGeted();
br.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}

public static String getString(Object obj) {
BufferedReader br;
String str = "";
try {
br = createBufferedReader(obj, "utf-8");
if (br != null) {
while ((line = br.readLine()) != null) {
str += line + "\n";
}
}
} catch (IOException e) {
e.printStackTrace();
}
return str;
}

public static void writeString(Object obj, String str) {
BufferedWriter bw;
try {
bw = createBufferedWriter(obj);
if (bw != null) {
bw.write(str);
bw.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}

發送POST請求

發送POST請求和GET沒有太大區別,只不過POST請求需要設置Request Body。在連接之前,得到輸出流,寫入fiddle里面的Request Body數據就可以。


注意!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系我们删除。



 
粤ICP备14056181号  © 2014-2021 ITdaan.com