Using the Swing Text HTML Parser
R. Kevin ColeMay 05, 2001

You can use the swing text package to parse HTML documents. Here is an example that parses a webpage and prints all the IMG tags to standard out.

Source code: HTMLParser.java
import javax.swing.text.html.*;
import javax.swing.text.Element;
import javax.swing.text.ElementIterator;
import java.net.URL;
import java.io.InputStreamReader;
import java.io.Reader;

/**
* Extract all "img" tags from an HTML document.
*/
public class HTMLParser
{
public static void main( String[] argv ) throws Exception
{
URL url = new URL( "http://java.sun.com" );
HTMLEditorKit kit = new HTMLEditorKit();
HTMLDocument doc = (HTMLDocument) kit.createDefaultDocument();
doc.putProperty("IgnoreCharsetDirective", Boolean.TRUE);
Reader HTMLReader = new InputStreamReader(url.openConnection().getInputStream());
kit.read(HTMLReader, doc, 0);

// Get an iterator for all HTML tags.
ElementIterator it = new ElementIterator(doc);
Element elem;

while( elem = it.next() != null )
{
if( elem.getName().equals( "img") )
{
String s = (String) elem.getAttributes().getAttribute(HTML.Attribute.SRC);
if( s != null )
System.out.println (s );
}
}
System.exit(0);
}
}