欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

Java获取网络文件并插入数据库的代码

程序员文章站 2023-11-27 18:49:22
获取百度的歌曲名,歌手和链接!! 复制代码 代码如下: package webtools; import java.io.bufferedreader; import ja...
获取百度的歌曲名,歌手和链接!!
复制代码 代码如下:

package webtools;
import java.io.bufferedreader;
import java.io.ioexception;
import java.io.inputstreamreader;
import java.io.unsupportedencodingexception;
import java.net.malformedurlexception;
import java.net.url;
import java.util.arraylist;
import java.util.hashmap;
import java.util.list;
import java.util.regex.matcher;
import java.util.regex.pattern;
import dbtools.dbtools;
public class iotoweb {
public string gethtmlcontent(string htmlurl) {
url url = null;
string rowcontent = "";
stringbuffer htmlcontent = new stringbuffer();
try {
url = new url(htmlurl);
bufferedreader in = new bufferedreader(new inputstreamreader(url
.openstream(), "gb2312"));
while ((rowcontent = in.readline()) != null) {
htmlcontent.append(rowcontent);
}
in.close();
} catch (malformedurlexception e) {
// todo auto-generated catch block
e.printstacktrace();
} catch (unsupportedencodingexception e) {
// todo auto-generated catch block
e.printstacktrace();
} catch (ioexception e) {
// todo auto-generated catch block
e.printstacktrace();
}
return htmlcontent.tostring();
}
public list getlink(string htmlcontent) {
arraylist listlink = new arraylist();
string regex = "<td[^>]*>[\\(]*<a[^>]*href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))[^>]*>(.*?)[\\)]*[\\s]*</td>";
pattern pattern = pattern.compile(regex, pattern.dotall);
matcher matcher = pattern.matcher(htmlcontent);
while (matcher.find()) {
listlink.add(matcher.group());
}
return listlink;
}
public list<string> gethref(string htmlcontent) {
string regex;
list listthref = new arraylist();
regex = "href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))\"";
pattern pa = pattern.compile(regex, pattern.dotall);
matcher ma = pa.matcher(htmlcontent);
while (ma.find()) {
listthref.add(ma.group().replacefirst("href=\"", "").replace("\"",
""));
}
return listthref;
}
public list<string> getperson(string htmlcontent) {
string regex;
list list = new arraylist();
regex = "\\(<a[^>]*href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))[^>]*>(.*?)\\)";
pattern pa = pattern.compile(regex, pattern.dotall);
matcher ma = pa.matcher(htmlcontent);
while (ma.find()) {
list.add(ma.group().replacefirst("href=\"", "").replace("\"", ""));
}
return list;
}
public list<string> getsongname(string htmlcontent) {
string regex;
list listperson = new arraylist();
regex = "<a[^>]*href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))[^>]*>(.*?)</a>\\s";
pattern pa = pattern.compile(regex, pattern.dotall);
matcher ma = pa.matcher(htmlcontent);
while (ma.find()) {
listperson.add(ma.group());
}
return listperson;
}
public string getmaincontent(string htmlcontent) {
string regex = "<table width=\"100%\" align=\"center\" cellpadding=\"0\" cellspacing=\"0\" class=\"list\">(.*?)</table>";
stringbuffer maincontent = new stringbuffer();
pattern pattern = pattern.compile(regex, pattern.dotall);
matcher matcher = pattern.matcher(htmlcontent);
while (matcher.find()) {
maincontent.append(matcher.group());
}
return maincontent.tostring();
}
public string outtag(final string s) {
return s.replaceall("<.*?>", "");
}
dbtools dbtools = new dbtools();
public void getfrombaidumap3(string htmlurl) throws throwable {
hashmap htmlcontentmap = new hashmap();
string htmlcontent = gethtmlcontent(htmlurl);
string maincontent = getmaincontent(htmlcontent);
list listlink = getlink(maincontent);
for (int j = 0; j < listlink.size(); j++) {
string tdtag = listlink.get(j).tostring();
list songnamelist = getsongname(tdtag);
string songname = outtag(songnamelist.get(0).tostring());
list personlist = getperson(tdtag);
string songperson = "";
if (personlist.size() != 0) {
for (int n = 0; n < personlist.size(); n++) {
// system.out.println(personlist.get(n).tostring());
songperson = outtag(personlist.get(n).tostring());
}
} else {
songperson = "无";
}
// system.out.print(songnamelist.get(0).tostring());
list hreflist = gethref(songnamelist.get(0).tostring());
string songhref = hreflist.get(0).tostring();
system.out.println();
string sql = "insert into song(songname,songperson,songhref) values(?,?,?)";
arraylist list_values = new arraylist();
list_values.add(songname);
list_values.add(songperson);
list_values.add(songhref);
dbtools.update(sql, list_values);
}
}
}

dbtools数据库链接类:
复制代码 代码如下:

package dbtools;
import java.util.arraylist;
import java.sql.*;
public class dbtools {
private preparedstatement preparedstatement;
private resultset resultset;
private connection connection;
public dbtools() {
try {
class.forname("com.mysql.jdbc.driver");
} catch (classnotfoundexception e) {
// todo auto-generated catch block
e.printstacktrace();
}
try {
connection = drivermanager.getconnection(
"jdbc:mysql://localhost:3306/testurl", "root", "zhuyi");
} catch (sqlexception e) {
// todo auto-generated catch block
e.printstacktrace();
}
}
public arraylist query(string sql, arraylist list_values) throws throwable {
arraylist listrows = new arraylist();
preparedstatement = connection.preparestatement(sql);
for (int i = 0; i < list_values.size(); i++) {
preparedstatement.setobject(i + 1, list_values.get(i));
}
resultset = preparedstatement.executequery();
while (resultset.next()) {
string[] rowinfo = new string[resultset.getmetadata()
.getcolumncount()];
for (int i = 0; i < rowinfo.length; i++) {
rowinfo[i] = resultset.getstring(i + 1);
}
listrows.add(rowinfo);
}
return listrows;
}
public void update(string sql, arraylist list_values) throws throwable {
preparedstatement = connection.preparestatement(sql);
for (int i = 0; i < list_values.size(); i++) {
preparedstatement.setobject(i + 1, list_values.get(i));
}
preparedstatement.executeupdate();
preparedstatement.close();
}
}

servlet调用:
复制代码 代码如下:

package controller;
import java.io.ioexception;
import java.io.printwriter;
import java.util.list;
import javax.servlet.servletexception;
import javax.servlet.http.httpservlet;
import javax.servlet.http.httpservletrequest;
import javax.servlet.http.httpservletresponse;
import webtools.iotoweb;
public class testurl extends httpservlet {
/**
* constructor of the object.
*/
public testurl() {
super();
}
/**
* destruction of the servlet. <br>
*/
public void destroy() {
super.destroy(); // just puts "destroy" string in log
// put your code here
}
/**
* the doget method of the servlet. <br>
*
* this method is called when a form has its tag value method equals to get.
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws servletexception
* if an error occurred
* @throws ioexception
* if an error occurred
*/
public void doget(httpservletrequest request, httpservletresponse response)
throws servletexception, ioexception {
try {
iotoweb iotoweb = new iotoweb();
iotoweb.getfrombaidumap3("http://list.mp3.baidu.com/topso/mp3topsong.html?id=1?top2");
} catch (throwable e) {
// todo auto-generated catch block
e.printstacktrace();
}
}
/**
* the dopost method of the servlet. <br>
*
* this method is called when a form has its tag value method equals to
* post.
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws servletexception
* if an error occurred
* @throws ioexception
* if an error occurred
*/
public void dopost(httpservletrequest request, httpservletresponse response)
throws servletexception, ioexception {
response.setcontenttype("text/html");
printwriter out = response.getwriter();
out
.println("<!doctype html public \"-//w3c//dtd html 4.01 transitional//en\">");
out.println("<html>");
out.println(" <head><title>a servlet</title></head>");
out.println(" <body>");
out.print(" this is ");
out.print(this.getclass());
out.println(", using the post method");
out.println(" </body>");
out.println("</html>");
out.flush();
out.close();
}
/**
* initialization of the servlet. <br>
*
* @throws servletexception
* if an error occurs
*/
public void init() throws servletexception {
// put your code here
}
}

获取金书网的图书名:
复制代码 代码如下:

package webtools;
import java.io.bufferedreader;
import java.io.inputstreamreader;
import java.net.url;
import java.util.arraylist;
import java.util.list;
import java.util.regex.matcher;
import java.util.regex.pattern;
import dbtools.dbtools;
public class getbook {
public string gethtmlcontent(string htmlurl) throws throwable {
url url = null;
string rowcontent = "";
stringbuffer htmlcontent = new stringbuffer();
url = new url(htmlurl);
bufferedreader in = new bufferedreader(new inputstreamreader(url
.openstream(), "gb2312"));
while ((rowcontent = in.readline()) != null) {
htmlcontent.append(rowcontent);
}
in.close();
return htmlcontent.tostring();
}
public string getbookname(string htmlcontent) {
string bookname = "";
string regex = "<span class=\"style15\">[^>]*</span>";
pattern pattern = pattern.compile(regex, pattern.dotall);
matcher matcher = pattern.matcher(htmlcontent);
if (matcher.find()) {
bookname = matcher.group();
}
return bookname;
}
public string outtag(final string s) {
return s.replaceall("<.*?>", "");
}
dbtools dbtools = new dbtools();
public void getfromjinshu(string htmlurl) throws throwable {
string htmlcontent = gethtmlcontent(htmlurl);
string bookname = outtag(getbookname(htmlcontent));
if (bookname != null && !"".equals(bookname)) {
system.out.println(bookname);
string sql = "insert into bookinfo(bookname) values(?)";
arraylist list_values = new arraylist();
list_values.add(bookname);
dbtools.update(sql, list_values);
}
}
}

调用servlet:
复制代码 代码如下:

package controller;
import java.io.ioexception;
import java.io.printwriter;
import javax.servlet.servletexception;
import javax.servlet.http.httpservlet;
import javax.servlet.http.httpservletrequest;
import javax.servlet.http.httpservletresponse;
import webtools.getbook;
public class testbook extends httpservlet {
/**
* constructor of the object.
*/
public testbook() {
super();
}
/**
* destruction of the servlet. <br>
*/
public void destroy() {
super.destroy(); // just puts "destroy" string in log
// put your code here
}
/**
* the doget method of the servlet. <br>
*
* this method is called when a form has its tag value method equals to get.
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws servletexception
* if an error occurred
* @throws ioexception
* if an error occurred
*/
int i = 1;
public void doget(httpservletrequest request, httpservletresponse response)
throws servletexception, ioexception {
getbook bookinfo = new getbook();
for (; i < 10000; i++) {
string bookurl = "http://www.golden-book.com/booksinfo/12/" + i
+ ".html";
try {
bookinfo.getfromjinshu(bookurl);
} catch (throwable e) {
i++;
dopost(request, response);
}
}
}
/**
* the dopost method of the servlet. <br>
*
* this method is called when a form has its tag value method equals to
* post.
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws servletexception
* if an error occurred
* @throws ioexception
* if an error occurred
*/
public void dopost(httpservletrequest request, httpservletresponse response)
throws servletexception, ioexception {
getbook bookinfo = new getbook();
for (; i < 10000; i++) {
string bookurl = "http://www.golden-book.com/booksinfo/12/" + i
+ ".html";
try {
bookinfo.getfromjinshu(bookurl);
} catch (throwable e) {
i++;
doget(request, response);
}
}
}
/**
* initialization of the servlet. <br>
*
* @throws servletexception
* if an error occurs
*/
public void init() throws servletexception {
// put your code here
}
}

每种功能的实现方法有很多,希望各位可以交流不同的思想和方法。可以加qq412546724。呵呵