| |
14. 37. 5. 寻找感兴趣的内容 |
|
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import javax.swing.text.AttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
public class MainClass {
public static void main(String args[]) throws Exception {
URL url = new URL("http://www.google.com");
URLConnection connection = url.openConnection();
InputStream is = connection.getInputStream();
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
HTMLEditorKit htmlKit = new HTMLEditorKit();
HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
HTMLEditorKit.Parser parser = new ParserDelegator();
HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
parser.parse(br, callback, true);
for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); iterator.isValid(); iterator
.next()) {
AttributeSet attributes = iterator.getAttributes();
String srcString = (String) attributes.getAttribute(HTML.Attribute.HREF);
System.out.print(srcString);
int startOffset = iterator.getStartOffset();
int endOffset = iterator.getEndOffset();
int length = endOffset - startOffset;
String text = htmlDoc.getText(startOffset, length);
System.out.println(" – " + text);
}
}
}
|
|
url?sa=p&pref=ig&pval=3&q=http://www.google.ca/ig%3Fhl%3Den&usg=__o-KrRDBI3nbRElKzYEMqfOl3_t0= – Personalize this page
https://www.google.com/accounts/Login?continue=http://www.google.ca/&hl=en – Sign in
http://images.google.ca/imghp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wi – Images
http://groups.google.ca/grphp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wg – Groups
http://news.google.ca/nwshp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wn – News
/maps?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wl – Maps
http://scholar.google.com/schhp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=ws – Scholar
/intl/en/options/ – more »
/advanced_search?hl=en – Advanced Search
/preferences?hl=en – Preferences
/language_tools?hl=en – Language Tools |
|