DOM-based content extraction of HTML documents
From Tetherless World Wiki
\begin{bibtex} @inproceedings{DBLP:conf/www/GuptaKNG03, title={DOM-based content extraction of HTML documents}, pages={207-214}, year={2003}, booktitle={WWW}, ee={http://doi.acm.org/10.1145/775152.775182}, url={http://www.informatik.uni-trier.de/~ley/db/conf/www/www2003.html#GuptaKNG03}, author={Suhit Gupta and Gail E. Kaiser and David Neistadt and Peter Grimm} }\end{bibtex}
