* Cleanup

* Catch up to build ID directory changes * Add support for ssh_cmd and scp_cmd to allow using HPN-SSH with the none cipher where possible (for performance) * Lazy client setup; claim-chroot will report if the client needs to be set up with this buildid, and we initiate the setup and poll until it is complete. This allows fast clients to begin building before slow ones have finished setting up. TODO: a better solution would be to avoid trying to dispatch jobs onto clients that are in the process of setting up, since they often have low loads and are picked preferentially by the job scheduler.
author: Kris Kennaway <kris@FreeBSD.org> 2008-07-26 14:01:07 +0000
committer: Kris Kennaway <kris@FreeBSD.org> 2008-07-26 14:01:07 +0000
commit: 90e209c3d9445cd844a6756570b0ed3695462e39 (patch)
tree: 3ad0a68aab6ffb8f5fc3d8afdb0a2717a1ee1e19
parent: Fix build of pango plugin (diff)
1 files changed, 160 insertions, 104 deletions
diff --git a/Tools/portbuild/scripts/pdispatch b/Tools/portbuild/scripts/pdispatch
index 004228803391..82a3af9ada01 100755
--- a/Tools/portbuild/scripts/pdispatch
+++ b/Tools/portbuild/scripts/pdispatch
@@ -1,13 +1,19 @@
 #!/bin/sh
 #
-# pdispatch <arch> <branch> <command> <package.tgz> [<args> ...]
+# pdispatch <arch> <branch> <buildid> <command> <package.tbz> [<args> ...]
 #
 # Choose a random machine from ${buildroot}/ulist and dispatch the
 # job to it via the ptimeout script.
 
 pb=/var/portbuild
 arch=$1
-shift
+branch=$2
+buildid=$3
+command=$4
+shift 4
+
+pbab=${pb}/${arch}/${branch}
+
 . ${pb}/${arch}/portbuild.conf
 . ${pb}/scripts/buildenv
 
@@ -17,139 +23,189 @@ timeout=360000
 loglength=1000
 hdrlength=6
 
-branch=$1
-command=$2
-shift 2
-
-buildenv ${pb} ${arch} ${branch}
+builddir=${pbab}/builds/${buildid}
+buildenv ${pb} ${arch} ${branch} ${builddir}
 
 # ssh -x doesn't work on some machines
 unset DISPLAY
 
+# Use HPN-SSH for performance
+if [ -z "${ssh_cmd}" ]; then
+    ssh_cmd=ssh
+fi
+if [ -z "${scp_cmd}" ]; then
+    scp_cmd=scp
+fi
+
 pkgname=$(basename $1 ${PKGSUFFIX})
 
-if grep -qxF ${pkgname} ${pb}/${arch}/${branch}/duds; then
-  echo "skipping ${pkgname}"
-  exit 1
+if grep -qxF ${pkgname} ${builddir}/duds; then
+    echo "skipping ${pkgname}"
+    exit 1
 fi
 
 if [ -z "${pkgname}" ]; then
-  echo "null packagename"
-  exit 1
+    echo "null packagename"
+    exit 1
 fi
 
 args=${1+"$@"}
 flags=""
 clean=1
 if [ "x$NOCLEAN" != "x" ]; then
-  flags="${flags} -noclean"
-  clean=0
+    flags="${flags} -noclean"
+    clean=0
 fi
 if [ "x$NO_RESTRICTED" != "x" ]; then
-  flags="${flags} -norestr"
+    flags="${flags} -norestr"
 fi
 if [ "x$NOPLISTCHECK" != "x" ]; then
-  flags="${flags} -noplistcheck"
+    flags="${flags} -noplistcheck"
 fi
 if [ "x$WANT_DISTFILES" != "x" ]; then
-  flags="${flags} -distfiles"
+    flags="${flags} -distfiles"
 fi
 if [ "x$FETCH_ORIGINAL" != "x" ]; then
-  flags="${flags} -fetch-original"
+    flags="${flags} -fetch-original"
 fi
 if [ "x$TRYBROKEN" != "x" ]; then
-  flags="${flags} -trybroken"
+    flags="${flags} -trybroken"
 fi
 
 while `true`; do
-  host=
-  chroot=
-  while [ -z "${host}" -o -z "${chroot}" ]; do
+    host=
     chroot=
-    host=$(lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/getmachine ${pb} ${arch} ${branch})
-    # If ulist is empty, then all build machines are busy, so try again in 15 seconds.
-    if [ -z "${host}" ]; then
-      sleep 15
-    else
-      . ${pb}/${arch}/portbuild.conf
-      test -f ${pb}/${arch}/portbuild.${host} && . ${pb}/${arch}/portbuild.${host}
-      chroot=$(ssh -a -n ${client_user}@${host} ${sudo_cmd} ${pb}/scripts/claim-chroot ${arch} ${branch} ${pkgname})
-      if [ -z "${chroot}" ]; then
-        echo "Failed to claim chroot on ${host}"
-        lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/releasemachine ${arch} ${host}
-      fi
-    fi
-  done
-  
-  . ${pb}/${arch}/portbuild.conf
-  test -f ${pb}/${arch}/portbuild.${host} && . ${pb}/${arch}/portbuild.${host}
-  
-  rm -f ${pb}/${arch}/${branch}/logs/${pkgname}.log ${pb}/${arch}/${branch}/logs/${pkgname}.log.bz2
-  rm -f ${pb}/${arch}/${branch}/errors/${pkgname}.log ${pb}/${arch}/${branch}/errors/${pkgname}.log.bz2
-
-  echo "dispatching: ssh -a -t -n ${client_user}@${host} ${sudo_cmd} ${command} ${arch} ${branch} ${chroot} ${flags} \"$ED\" \"$PD\" \"$FD\" \"$BD\" \"$RD\" ${args}"
-  ${pb}/scripts/ptimeout.host $timeout ssh -a -t -n ${client_user}@${host} ${sudo_cmd} ${command} ${arch} ${branch} ${chroot} ${flags} \"$ED\" \"$PD\" \"$FD\" \"$BD\" \"$RD\" ${args} 2>&1
-  error=$?
-
-  #if grep -q " failed unexpectedly on " ${pb}/${arch}/${branch}/logs/${pkgname}.pre.log; then
-  #  cat ${pb}/${arch}/${branch}/logs/${pkgname}.pre.log | mail -s "${pkgname} failed uncleanly on ${arch} ${branch}" ${mailto}
+    while [ -z "${host}" -o -z "${chroot}" ]; do
+	chroot=
+	host=$(lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/getmachine ${pb} ${arch} ${branch})
+        # If ulist is empty, then all build machines are busy, so try
+        # again in 15 seconds.
+	if [ -z "${host}" ]; then
+	    sleep 15
+	else
+	    . ${pb}/${arch}/portbuild.conf
+	    test -f ${pb}/${arch}/portbuild.${host} && . ${pb}/${arch}/portbuild.${host}
+	    chrootdata=$(${ssh_cmd} -a -n ${client_user}@${host} ${sudo_cmd} ${pb}/scripts/claim-chroot ${arch} ${branch} ${buildid} ${pkgname} 2>&1)
+	    if [ -z "${chrootdata}" ]; then
+		echo "Failed to claim chroot on ${host}"
+	    fi
+
+	    case "${chrootdata}" in
+		*/var/portbuild/scripts/claim-chroot*)
+		    # Error executing script, assume system is booting
+		    chrootdata="wait boot"
+		    ;;
+	    esac
+
+	    echo "Got ${chrootdata} from ${host}"
+
+	    set -- ${chrootdata}
+	    if [ $# -ge 2 ]; then
+		case $1 in
+		    chroot)
+			chroot=$2
+			;;
+		    setup)
+			echo "Setting up ${arch}/${branch} build ID ${buildid} on ${host}"
+
+			# Run in the background so we can potentially
+			# claim a slot on another machine.  In
+			# practise I think we often end up trying
+			# again on the same machine though.
+
+			# Make sure to close stdin/stderr in the child
+			# or make will hang until the child process
+			# exits
+			# XXX Revert to >&- once this is confirmed as working
+			${pb}/scripts/dosetupnode ${arch} ${branch} ${buildid} ${host} >/tmp/setupnode.$$ 2>&1 &
+			;;
+		    error)
+			echo "Error reported by ${host}: $2"
+			sleep 60
+			;;
+		    wait)
+			echo "Waiting for setup to finish"
+			sleep 60
+			;;
+		esac
+		shift 2
+	    fi
+
+	    if [ -z "${chroot}" ]; then
+		lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/releasemachine ${arch} ${host}
+	    fi
+	fi
+    done
+    
+    . ${pb}/${arch}/portbuild.conf
+    test -f ${pb}/${arch}/portbuild.${host} && . ${pb}/${arch}/portbuild.${host}
+    
+    rm -f ${builddir}/logs/${pkgname}.log ${builddir}/logs/${pkgname}.log.bz2
+    rm -f ${builddir}/errors/${pkgname}.log ${builddir}/errors/${pkgname}.log.bz2
+    
+    echo "dispatching: ${ssh_cmd} -a -t -n ${client_user}@${host} ${sudo_cmd} ${command} ${arch} ${branch} ${buildid} ${chroot} ${flags} \"$ED\" \"$PD\" \"$FD\" \"$BD\" \"$RD\" ${args}"
+    ${pb}/scripts/ptimeout.host $timeout ${ssh_cmd} -a -t -n ${client_user}@${host} ${sudo_cmd} ${command} ${arch} ${branch} ${buildid} ${chroot} ${flags} \"$ED\" \"$PD\" \"$FD\" \"$BD\" \"$RD\" ${args} 2>&1
+    error=$?
+    
+  #if grep -q " failed unexpectedly on " ${builddir}/logs/${pkgname}.pre.log; then
+  #  cat ${builddir}/logs/${pkgname}.pre.log | mail -s "${pkgname} failed uncleanly on ${arch} ${branch}" ${mailto}
   #else
-  #  rm ${pb}/${arch}/${branch}/logs/${pkgname}.pre.log
+  #  rm ${builddir}/logs/${pkgname}.pre.log
   #fi
-  
-  # Pull in the results of the build from the client
-  
-  scp ${client_user}@${host}:${chroot}/tmp/${pkgname}.log ${pb}/${arch}/${branch}/logs/${pkgname}.log
-  (ssh -a -n ${client_user}@${host} test -f ${chroot}/tmp/work.tbz ) && scp ${client_user}@${host}:${chroot}/tmp/work.tbz ${pb}/${arch}/${branch}/wrkdirs/${pkgname}.tbz
-  
-  # XXX Set dirty flag if any of the scp's fail
-
-  mkdir -p ${pb}/${arch}/${branch}/distfiles/.pbtmp/${pkgname}
-  ssh -a -n ${client_user}@${host} tar -C ${chroot}/tmp/distfiles -cf - . | \
-    tar --unlink -C ${pb}/${arch}/${branch}/distfiles/.pbtmp/${pkgname} -xvf -
-  touch ${pb}/${arch}/${branch}/distfiles/.pbtmp/${pkgname}/.done
-  
-  if [ "${error}" = 0 ]; then
-    ssh -a -n ${client_user}@${host} tar -C ${chroot}/tmp -cf - packages | \
-      tar --unlink -C ${pb}/${arch}/${branch} -xvf -
-    test -f ${pb}/${arch}/${branch}/packages/All/${pkgname}${PKGSUFFIX} && \
-      touch ${pb}/${arch}/${branch}/packages/All/${pkgname}${PKGSUFFIX}
-    rm -f ${pb}/${arch}/${branch}/errors/${pkgname}.log && \
-      touch ${pb}/${arch}/${branch}/errors/.force
-    lockf -k ${pb}/${arch}/${branch}/failure.lock ${pb}/scripts/buildsuccess ${arch} ${branch} ${pkgname}
-    log=${pb}/${arch}/${branch}/logs/$pkgname.log
-    if grep -q "even though it is marked BROKEN" ${log}; then
-      echo | mail -s "${pkgname} BROKEN but built on ${arch} ${branch}" ${mailto}
-    fi
-    if grep -q "^list of .*file" ${log}; then
-      buildlogdir=$(realpath ${pb}/${arch}/${branch}/logs/)
-      baselogdir=$(basename ${buildlogdir})
-      (sed -e '/^build started/,$d' $log;echo;echo "For the full build log, see"; echo; echo "  http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;sed -e '1,/^=== Checking filesystem state/d' $log) | mail -s "${pkgname} pkg-plist errors on ${arch} ${branch}" ${mailto}
-    fi
-  else
-    log=${pb}/${arch}/${branch}/errors/${pkgname}.log
-    scp ${client_user}@${host}:${chroot}/tmp/${pkgname}.log ${log} || (echo ${chroot}@${host}; ssh -a -n ${client_user}@${host} ls -laR ${chroot}/tmp) | mail -s "${pkgname} logfile not found" ${mailto}
-    if ! grep -q "even though it is marked BROKEN" ${log}; then
-      buildlogdir=$(realpath ${pb}/${arch}/${branch}/logs/)
-      baselogdir=$(basename ${buildlogdir})
-      if [ `wc -l ${log} | awk '{print $1}'` -le `expr ${loglength} + ${hdrlength}` ]; then
-        (echo "You can also find this build log at"; echo; echo "  http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;cat ${log}) | mail -s "${pkgname} failed on ${arch} ${branch}" ${mailto}
-      else
-        (echo "Excerpt from the build log at"; echo; echo "  http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;sed -e '/^build started/,$d' $log;echo;echo "  [... lines trimmed ...]";echo;tail -${loglength} ${log}) | mail -s "${pkgname} failed on ${arch} ${branch}" ${mailto}
-      fi
+    
+    # Pull in the results of the build from the client
+    
+    ${scp_cmd} ${client_user}@${host}:${chroot}/tmp/${pkgname}.log ${builddir}/logs/${pkgname}.log
+    (${ssh_cmd} -a -n ${client_user}@${host} test -f ${chroot}/tmp/work.tbz ) && ${scp_cmd} ${client_user}@${host}:${chroot}/tmp/work.tbz ${builddir}/wrkdirs/${pkgname}.tbz
+    
+    # XXX Set dirty flag if any of the scp's fail
+    
+    mkdir -p ${builddir}/distfiles/.pbtmp/${pkgname}
+    ${ssh_cmd} -a -n ${client_user}@${host} tar -C ${chroot}/tmp/distfiles -cf - . | \
+	tar --unlink -C ${builddir}/distfiles/.pbtmp/${pkgname} -xvf -
+    touch ${builddir}/distfiles/.pbtmp/${pkgname}/.done
+    
+    if [ "${error}" = 0 ]; then
+	${ssh_cmd} -a -n ${client_user}@${host} tar -C ${chroot}/tmp -cf - packages | \
+	    tar --unlink -C ${builddir} -xvf -
+	test -f ${builddir}/packages/All/${pkgname}${PKGSUFFIX} && \
+	    touch ${builddir}/packages/All/${pkgname}${PKGSUFFIX}
+	rm -f ${builddir}/errors/${pkgname}.log && \
+	    touch ${builddir}/errors/.force
+	lockf -k ${pbab}/failure.lock ${pb}/scripts/buildsuccess ${arch} ${branch} ${buildid} ${pkgname}
+	log=${builddir}/logs/$pkgname.log
+	if grep -q "even though it is marked BROKEN" ${log}; then
+	    echo | mail -s "${pkgname} BROKEN but built on ${arch} ${branch}" ${mailto}
+	fi
+	if grep -q "^list of .*file" ${log}; then
+	    buildlogdir=$(realpath ${builddir}/logs/)
+	    baselogdir=$(basename ${buildlogdir})
+	    (sed -e '/^build started/,$d' $log;echo;echo "For the full build log, see"; echo; echo "  http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;sed -e '1,/^=== Checking filesystem state/d' $log) | mail -s "${pkgname} pkg-plist errors on ${arch} ${branch}" ${mailto}
+	fi
+    else
+	log=${builddir}/errors/${pkgname}.log
+	${scp_cmd} ${client_user}@${host}:${chroot}/tmp/${pkgname}.log ${log} || (echo ${chroot}@${host}; ${ssh_cmd} -a -n ${client_user}@${host} ls -laR ${chroot}/tmp) | mail -s "${pkgname} logfile not found" ${mailto}
+	if ! grep -q "even though it is marked BROKEN" ${log}; then
+	    buildlogdir=$(realpath ${builddir}/logs/)
+	    baselogdir=$(basename ${buildlogdir})
+	    if [ `wc -l ${log} | awk '{print $1}'` -le `expr ${loglength} + ${hdrlength}` ]; then
+		(echo "You can also find this build log at"; echo; echo "  http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;cat ${log}) | mail -s "${pkgname} failed on ${arch} ${branch}" ${mailto}
+	    else
+		(echo "Excerpt from the build log at"; echo; echo "  http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;sed -e '/^build started/,$d' $log;echo;echo "  [... lines trimmed ...]";echo;tail -${loglength} ${log}) | mail -s "${pkgname} failed on ${arch} ${branch}" ${mailto}
+	    fi
+	fi
+	lockf -k ${pbab}/failure.lock ${pb}/scripts/buildfailure ${arch} ${branch} ${buildid} ${pkgname}
     fi
-    lockf -k ${pb}/${arch}/${branch}/failure.lock ${pb}/scripts/buildfailure ${arch} ${branch} ${pkgname}
-  fi
-  
-  ssh -a -n ${client_user}@${host} ${sudo_cmd} ${pb}/scripts/clean-chroot ${arch} ${branch} ${chroot} ${clean}
-  
-  lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/releasemachine ${arch} ${host}
-
+    
+    ${ssh_cmd} -a -n ${client_user}@${host} ${sudo_cmd} ${pb}/scripts/clean-chroot ${arch} ${branch} ${buildid} ${chroot} ${clean}
+    
+    lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/releasemachine ${arch} ${host}
+    
   # XXX Set a dirty variable earlier and check here
-  if grep -q "^build of .*ended at" ${pb}/${arch}/${branch}/logs/${pkgname}.log; then
-    exit ${error}
-  else
-    echo "Build of ${pkgname} in ${host}:/${chroot} failed uncleanly, rebuilding"
-    sleep 120
-  fi
+    if grep -q "^build of .*ended at" ${builddir}/logs/${pkgname}.log; then
+	exit ${error}
+    else
+	echo "Build of ${pkgname} in ${host}:/${chroot} failed uncleanly, rebuilding"
+	sleep 120
+    fi
 done
author	Kris Kennaway <kris@FreeBSD.org>	2008-07-26 14:01:07 +0000
committer	Kris Kennaway <kris@FreeBSD.org>	2008-07-26 14:01:07 +0000
commit	90e209c3d9445cd844a6756570b0ed3695462e39 (patch)
tree	3ad0a68aab6ffb8f5fc3d8afdb0a2717a1ee1e19
parent	Fix build of pango plugin (diff)