From 302ddeb73e4485411c6ccb4dbc8d317879222dba Mon Sep 17 00:00:00 2001
From: "David W. Chapman Jr." <dwcjr@FreeBSD.org>
Date: Sat, 23 Jun 2001 16:09:53 +0000
Subject: Add crawl, a worm that searches for jpegs

PR:		28304
Submitted by:	Pete Fritchman <petef@databits.net>
---
 www/crawl/Makefile                 | 25 +++++++++++++++++++++++++
 www/crawl/distinfo                 |  1 +
 www/crawl/files/patch-configure.in | 17 +++++++++++++++++
 www/crawl/pkg-comment              |  1 +
 www/crawl/pkg-descr                | 23 +++++++++++++++++++++++
 www/crawl/pkg-plist                |  1 +
 6 files changed, 68 insertions(+)
 create mode 100644 www/crawl/Makefile
 create mode 100644 www/crawl/distinfo
 create mode 100644 www/crawl/files/patch-configure.in
 create mode 100644 www/crawl/pkg-comment
 create mode 100644 www/crawl/pkg-descr
 create mode 100644 www/crawl/pkg-plist

(limited to 'www/crawl')

diff --git a/www/crawl/Makefile b/www/crawl/Makefile
new file mode 100644
index 000000000000..e25ae90ec559
--- /dev/null
+++ b/www/crawl/Makefile
@@ -0,0 +1,25 @@
+# New ports collection makefile for:	crawl
+# Date created:				20 June 2001
+# Whom:					Pete Fritchman <petef@databits.net>
+#
+# $FreeBSD$
+#
+
+PORTNAME=	crawl
+PORTVERSION=	0.1
+CATEGORIES=	www
+MASTER_SITES=	http://www.monkey.org/~provos/
+
+MAINTAINER=	petef@databits.net
+
+BUILD_DEPENDS=	${LOCALBASE}/lib/libevent.a:${PORTSDIR}/devel/libevent
+
+WRKSRC=		${WRKDIR}/${PORTNAME}
+
+USE_AUTOCONF=	yes
+GNU_CONFIGURE=	yes
+CONFIGURE_ARGS=	--with-libevent=${LOCALBASE}
+
+MAN1=	crawl.1
+
+.include <bsd.port.mk>
diff --git a/www/crawl/distinfo b/www/crawl/distinfo
new file mode 100644
index 000000000000..80abbc95dafe
--- /dev/null
+++ b/www/crawl/distinfo
@@ -0,0 +1 @@
+MD5 (crawl-0.1.tar.gz) = 93df9d0e6534bc4fc462950c023ec2e7
diff --git a/www/crawl/files/patch-configure.in b/www/crawl/files/patch-configure.in
new file mode 100644
index 000000000000..9de236fb45a9
--- /dev/null
+++ b/www/crawl/files/patch-configure.in
@@ -0,0 +1,17 @@
+--- configure.in.orig	Wed Jun 20 14:41:44 2001
++++ configure.in	Wed Jun 20 17:30:07 2001
+@@ -38,11 +38,11 @@
+      ;;
+   *)
+      AC_MSG_RESULT($withval)
+-     if test -f $withval/event.h -a -f $withval/libevent.a; then
++     if test -f $withval/include/event.h -a -f $withval/lib/libevent.a; then
+         owd=`pwd`
+         if cd $withval; then withval=`pwd`; cd $owd; fi
+-        EVENTINC="-I$withval"
+-        EVENTLIB="-L$withval -levent"
++        EVENTINC="-I$withval/include"
++        EVENTLIB="-L$withval/lib -levent"
+      else
+         AC_ERROR(event.h or libevent.a not found in $withval)
+      fi
diff --git a/www/crawl/pkg-comment b/www/crawl/pkg-comment
new file mode 100644
index 000000000000..16dd9e5120d0
--- /dev/null
+++ b/www/crawl/pkg-comment
@@ -0,0 +1 @@
+A small, efficient web crawler with advanced features
diff --git a/www/crawl/pkg-descr b/www/crawl/pkg-descr
new file mode 100644
index 000000000000..96361c6087a2
--- /dev/null
+++ b/www/crawl/pkg-descr
@@ -0,0 +1,23 @@
+The crawl utility starts a depth-first traversal of the web at the
+specified URLs. It stores all JPEG images that match the configured
+constraints.  Crawl is fairly fast and allows for graceful termination.
+After terminating crawl, it is possible to restart it at exactly
+the same spot where it was terminated. Crawl keeps a persistent
+database that allows multiple crawls without revisiting sites.
+
+The main reason for writing crawl was the lack of simple open source
+web crawlers. Crawl is only a few thousand lines of code and fairly
+easy to debug and customize.
+
+Some of the main features:
+ - Saves encountered JPEG images
+ - Image selection based on regular expressions and size contrainsts
+ - Resume previous crawl after graceful termination
+ - Persistent database of visited URLs
+ - Very small and efficient code
+ - Supports robots.txt
+
+WWW: http://www.monkey.org/~provos/crawl/
+
+- Pete
+petef@databits.net
diff --git a/www/crawl/pkg-plist b/www/crawl/pkg-plist
new file mode 100644
index 000000000000..1cdd09ea5311
--- /dev/null
+++ b/www/crawl/pkg-plist
@@ -0,0 +1 @@
+bin/crawl
-- 
cgit v1.2.3