import javax.swing.text.html.*; import javax.swing.text.Element; import javax.swing.text.ElementIterator; import java.net.URL; import java.io.InputStreamReader; import java.io.Reader; /** * Extract all "img" tags from an HTML document. */ public class HTMLParser { public static void main( String[] argv ) throws Exception { URL url = new URL( "http://java.sun.com" ); HTMLEditorKit kit = new HTMLEditorKit(); HTMLDocument doc = (HTMLDocument) kit.createDefaultDocument(); doc.putProperty("IgnoreCharsetDirective", Boolean.TRUE); Reader HTMLReader = new InputStreamReader(url.openConnection().getInputStream()); kit.read(HTMLReader, doc, 0); // Get an iterator for all HTML tags. ElementIterator it = new ElementIterator(doc); Element elem; while(( elem = it.next() ) != null ) { if( elem.getName().equals( "img" ) ) { String s = (String) elem.getAttributes().getAttribute( HTML.Attribute.SRC ); if( s != null ) System.out.println( s ); } } System.exit(0); } }