欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

java 实现通过 post 方式提交json参数操作

程序员文章站 2022-07-04 19:59:32
由于所爬取的网站需要验证码,通过网页的开发人员工具【f12】及在线http post,get接口测试请求工具(http://coolaf.com/)发现访问时加上请求头header 信息时可以跳过验证...

由于所爬取的网站需要验证码,通过网页的开发人员工具【f12】及在线http post,get接口测试请求工具(http://coolaf.com/)发现访问时加上请求头header 信息时可以跳过验证码校验。

而且该网站只接受post请求,对提交的参数也只接受json格式,否则请求失败。

现将通过 post 方式提交json参数的方法记录如下:

import java.io.unsupportedencodingexception;
import java.net.uri;
import java.net.urldecoder;
import java.util.arraylist;
import java.util.list;

import org.apache.http.httpentity;
import org.apache.http.httpresponse;
import org.apache.http.client.httpclient;
import org.apache.http.client.config.requestconfig;
import org.apache.http.client.methods.httppost;
import org.apache.http.client.methods.httprequestbase;
import org.apache.http.client.utils.uribuilder;
import org.apache.http.entity.stringentity;
import org.apache.http.impl.client.closeablehttpclient;
import org.apache.http.impl.client.httpclientbuilder;
import org.apache.http.impl.client.httpclients;
import org.apache.http.util.entityutils;

import com.alibaba.fastjson.jsonarray;
import com.alibaba.fastjson.jsonobject;

/**
 * <p>@postjsonparamstest.java</p> 
 * @version 1.0
 * @author zxk
 * @date 2018-3-3
 */
public class postjsonparamstest {

  // 超时时间
  private static final int run_time =10000;

  // 爬取初始页数
  private string page;

  public static void main(string[] args) throws exception {
    postjsonparamstest crawl = new postjsonparamstest();

    // 请求的url地址
    string url ="http://www.gzcredit.gov.cn/service/creditservice.asmx/searchorgwithpage";
    // 设置起始访问页码
    crawl.setpage("1");
    string isstop = "";

    // 设置请求
    httprequestbase request = null;
    request = new httppost(url);

    try {
      // 设置config
      requestconfig requestconfig = requestconfig.custom()
            .setsockettimeout(run_time)
            .setconnecttimeout(run_time)
            .setconnectionrequesttimeout(run_time)
            .build();
      request.setconfig(requestconfig);

      // json 格式的 post 参数
      string postparams ="{\"condition\":{\"qymc\":\"%%%%\",\"cydw\":\"\"},\"pageno\":"+crawl.getpage()+",\"pagesize\":100,count:2709846}";
      system.out.println(postparams);
      httpentity httpentity = new stringentity(postparams);
      ((httppost) request).setentity(httpentity);

      // 添加请求头,可以绕过验证码
      request.addheader("accept","application/json, text/javascript, */*");
      request.addheader("accept-encoding","gzip, deflate");
      request.addheader("accept-language", "zh-cn,zh;q=0.8");
      request.addheader("connection", "keep-alive");
      request.addheader("host", "www.gzcredit.gov.cn");
      request.addheader("content-type", "application/json; charset=utf-8");

      uribuilder builder = new uribuilder(url);       
      uri uri = builder.build();
      uri = new uri(urldecoder.decode(uri.tostring(), "utf-8"));
      request.seturi(uri);

      while(!isstop.equals("停止")||isstop.equals("重跑")){
        isstop = crawl.crawllist(request);
        if(isstop.equals("爬取")){
          crawl.setpage(string.valueof(integer.parseint(crawl.getpage())+1));
        }

        // if("2713".equals(crawl.getpage())) break;
        if("2".equals(crawl.getpage())){
          break;
        }
      }
    } catch (numberformatexception e) {
      e.printstacktrace();
      throw new numberformatexception("数字格式错误");
    } catch (unsupportedencodingexception e) {
      e.printstacktrace();
      throw new unsupportedencodingexception("不支持的编码集");
    }
  }
  /**
   * 爬取搜索列表
   * @param page
   * @return
   */
  private string crawllist(httprequestbase request){
    int statuscode = 0;

    // 下面两种方式都可以用来创建客户端连接,相当于打开了一个浏览器
    closeablehttpclient httpclient = httpclients.createdefault(); 
    // httpclient httpclient = httpclientbuilder.create().build();

    httpentity httpentity = null;
    httpresponse response = null;
    try {      
      try {        
        response = httpclient.execute(request);
      } catch (exception e){
        e.printstacktrace();
        entityutils.consumequietly(httpentity);
        return "重跑";
      } 

      //打印状态
      statuscode =response.getstatusline().getstatuscode();
      if(statuscode!=200){
        entityutils.consumequietly(httpentity);
        return "重跑";
      }
      //实体
      httpentity = response.getentity();
      string searchliststr = entityutils.tostring(httpentity,"gbk").replaceall("\\\\米", "米");
      string alldata = (string) jsonobject.parseobject(searchliststr).get("d");
      // 字符串值中间含双引号的替换处理
      string s = alldata.replaceall("\\{\"","{'")
          .replaceall("\":\"", "':'")
          .replaceall("\",\"", "','")
          .replaceall("\":", "':")
          .replaceall(",\"", ",'")
          .replaceall("\"\\}", "'}")
          .replaceall("\"", "")
          .replaceall("'", "\"")
          .replaceall("<br />", "")        
          .replaceall("\t", "")
          .replaceall("\\\\", "?");
      jsonobject jsondata = jsonobject.parseobject(s);
      jsonarray jsoncontent = jsondata.getjsonarray("orglist");

      searchliststr = null;
      alldata = null; 
      s = null;

      if (jsoncontent==null || jsoncontent.size()<1) {
        return "重跑";
      }
      system.out.println(jsoncontent.tojsonstring());
      return "爬取";
    } catch (exception e) {
      e.printstacktrace();
      return "重跑";
    } finally{
      entityutils.consumequietly(httpentity);
    }
  }

  private string getpage() {
    return page;
  }

  private void setpage(string page) {
    this.page = page;
  }

}

补充知识:java利用httpclient发送post请求,将请求数据放到body里

我就废话不多说了,大家还是直接看代码吧~

  /**
   * post请求 ,请求数据放到body里
   * @param url  请求地址
   * @param bodydata 参数
   * @author wangyj
   * @date 2019年4月20日
   */
  public static string dopostbodydata(string url, string bodydata) throws exception{
    string result = "";
    closeablehttpclient httpclient = null;
    closeablehttpresponse response = null;
    try {
      httppost httppost = gethttppost(url, null); // 请求地址
      httppost.setentity(new stringentity(bodydata, encoding));
      httpclient = gethttpclient();
      // 得到返回的response
      response = httpclient.execute(httppost);
      httpentity entity = response.getentity();
      result = getresult(entity, encoding);
    } catch (exception e) {
      throw e;
    } finally {
      // 关闭httpclient
      if (null != httpclient) {
        httpclient.close();
      }
      // 关闭response
      if (null != response) {
        entityutils.consume(response.getentity()); // 会自动释放连接
        response.close();
      }
    }
    return result;
  }

以上这篇java 实现通过 post 方式提交json参数操作就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持。