diff options
author | Kris Kennaway <kris@FreeBSD.org> | 2008-07-26 14:01:07 +0000 |
---|---|---|
committer | Kris Kennaway <kris@FreeBSD.org> | 2008-07-26 14:01:07 +0000 |
commit | 90e209c3d9445cd844a6756570b0ed3695462e39 (patch) | |
tree | 3ad0a68aab6ffb8f5fc3d8afdb0a2717a1ee1e19 | |
parent | Fix build of pango plugin (diff) |
* Cleanup
* Catch up to build ID directory changes
* Add support for ssh_cmd and scp_cmd to allow using HPN-SSH with the
none cipher where possible (for performance)
* Lazy client setup; claim-chroot will report if the client needs to be
set up with this buildid, and we initiate the setup and poll until
it is complete. This allows fast clients to begin building before
slow ones have finished setting up.
TODO: a better solution would be to avoid trying to dispatch jobs onto
clients that are in the process of setting up, since they often have low
loads and are picked preferentially by the job scheduler.
Notes
Notes:
svn path=/head/; revision=217580
-rwxr-xr-x | Tools/portbuild/scripts/pdispatch | 264 |
1 files changed, 160 insertions, 104 deletions
diff --git a/Tools/portbuild/scripts/pdispatch b/Tools/portbuild/scripts/pdispatch index 004228803391..82a3af9ada01 100755 --- a/Tools/portbuild/scripts/pdispatch +++ b/Tools/portbuild/scripts/pdispatch @@ -1,13 +1,19 @@ #!/bin/sh # -# pdispatch <arch> <branch> <command> <package.tgz> [<args> ...] +# pdispatch <arch> <branch> <buildid> <command> <package.tbz> [<args> ...] # # Choose a random machine from ${buildroot}/ulist and dispatch the # job to it via the ptimeout script. pb=/var/portbuild arch=$1 -shift +branch=$2 +buildid=$3 +command=$4 +shift 4 + +pbab=${pb}/${arch}/${branch} + . ${pb}/${arch}/portbuild.conf . ${pb}/scripts/buildenv @@ -17,139 +23,189 @@ timeout=360000 loglength=1000 hdrlength=6 -branch=$1 -command=$2 -shift 2 - -buildenv ${pb} ${arch} ${branch} +builddir=${pbab}/builds/${buildid} +buildenv ${pb} ${arch} ${branch} ${builddir} # ssh -x doesn't work on some machines unset DISPLAY +# Use HPN-SSH for performance +if [ -z "${ssh_cmd}" ]; then + ssh_cmd=ssh +fi +if [ -z "${scp_cmd}" ]; then + scp_cmd=scp +fi + pkgname=$(basename $1 ${PKGSUFFIX}) -if grep -qxF ${pkgname} ${pb}/${arch}/${branch}/duds; then - echo "skipping ${pkgname}" - exit 1 +if grep -qxF ${pkgname} ${builddir}/duds; then + echo "skipping ${pkgname}" + exit 1 fi if [ -z "${pkgname}" ]; then - echo "null packagename" - exit 1 + echo "null packagename" + exit 1 fi args=${1+"$@"} flags="" clean=1 if [ "x$NOCLEAN" != "x" ]; then - flags="${flags} -noclean" - clean=0 + flags="${flags} -noclean" + clean=0 fi if [ "x$NO_RESTRICTED" != "x" ]; then - flags="${flags} -norestr" + flags="${flags} -norestr" fi if [ "x$NOPLISTCHECK" != "x" ]; then - flags="${flags} -noplistcheck" + flags="${flags} -noplistcheck" fi if [ "x$WANT_DISTFILES" != "x" ]; then - flags="${flags} -distfiles" + flags="${flags} -distfiles" fi if [ "x$FETCH_ORIGINAL" != "x" ]; then - flags="${flags} -fetch-original" + flags="${flags} -fetch-original" fi if [ "x$TRYBROKEN" != "x" ]; then - flags="${flags} -trybroken" + flags="${flags} -trybroken" fi while `true`; do - host= - chroot= - while [ -z "${host}" -o -z "${chroot}" ]; do + host= chroot= - host=$(lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/getmachine ${pb} ${arch} ${branch}) - # If ulist is empty, then all build machines are busy, so try again in 15 seconds. - if [ -z "${host}" ]; then - sleep 15 - else - . ${pb}/${arch}/portbuild.conf - test -f ${pb}/${arch}/portbuild.${host} && . ${pb}/${arch}/portbuild.${host} - chroot=$(ssh -a -n ${client_user}@${host} ${sudo_cmd} ${pb}/scripts/claim-chroot ${arch} ${branch} ${pkgname}) - if [ -z "${chroot}" ]; then - echo "Failed to claim chroot on ${host}" - lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/releasemachine ${arch} ${host} - fi - fi - done - - . ${pb}/${arch}/portbuild.conf - test -f ${pb}/${arch}/portbuild.${host} && . ${pb}/${arch}/portbuild.${host} - - rm -f ${pb}/${arch}/${branch}/logs/${pkgname}.log ${pb}/${arch}/${branch}/logs/${pkgname}.log.bz2 - rm -f ${pb}/${arch}/${branch}/errors/${pkgname}.log ${pb}/${arch}/${branch}/errors/${pkgname}.log.bz2 - - echo "dispatching: ssh -a -t -n ${client_user}@${host} ${sudo_cmd} ${command} ${arch} ${branch} ${chroot} ${flags} \"$ED\" \"$PD\" \"$FD\" \"$BD\" \"$RD\" ${args}" - ${pb}/scripts/ptimeout.host $timeout ssh -a -t -n ${client_user}@${host} ${sudo_cmd} ${command} ${arch} ${branch} ${chroot} ${flags} \"$ED\" \"$PD\" \"$FD\" \"$BD\" \"$RD\" ${args} 2>&1 - error=$? - - #if grep -q " failed unexpectedly on " ${pb}/${arch}/${branch}/logs/${pkgname}.pre.log; then - # cat ${pb}/${arch}/${branch}/logs/${pkgname}.pre.log | mail -s "${pkgname} failed uncleanly on ${arch} ${branch}" ${mailto} + while [ -z "${host}" -o -z "${chroot}" ]; do + chroot= + host=$(lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/getmachine ${pb} ${arch} ${branch}) + # If ulist is empty, then all build machines are busy, so try + # again in 15 seconds. + if [ -z "${host}" ]; then + sleep 15 + else + . ${pb}/${arch}/portbuild.conf + test -f ${pb}/${arch}/portbuild.${host} && . ${pb}/${arch}/portbuild.${host} + chrootdata=$(${ssh_cmd} -a -n ${client_user}@${host} ${sudo_cmd} ${pb}/scripts/claim-chroot ${arch} ${branch} ${buildid} ${pkgname} 2>&1) + if [ -z "${chrootdata}" ]; then + echo "Failed to claim chroot on ${host}" + fi + + case "${chrootdata}" in + */var/portbuild/scripts/claim-chroot*) + # Error executing script, assume system is booting + chrootdata="wait boot" + ;; + esac + + echo "Got ${chrootdata} from ${host}" + + set -- ${chrootdata} + if [ $# -ge 2 ]; then + case $1 in + chroot) + chroot=$2 + ;; + setup) + echo "Setting up ${arch}/${branch} build ID ${buildid} on ${host}" + + # Run in the background so we can potentially + # claim a slot on another machine. In + # practise I think we often end up trying + # again on the same machine though. + + # Make sure to close stdin/stderr in the child + # or make will hang until the child process + # exits + # XXX Revert to >&- once this is confirmed as working + ${pb}/scripts/dosetupnode ${arch} ${branch} ${buildid} ${host} >/tmp/setupnode.$$ 2>&1 & + ;; + error) + echo "Error reported by ${host}: $2" + sleep 60 + ;; + wait) + echo "Waiting for setup to finish" + sleep 60 + ;; + esac + shift 2 + fi + + if [ -z "${chroot}" ]; then + lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/releasemachine ${arch} ${host} + fi + fi + done + + . ${pb}/${arch}/portbuild.conf + test -f ${pb}/${arch}/portbuild.${host} && . ${pb}/${arch}/portbuild.${host} + + rm -f ${builddir}/logs/${pkgname}.log ${builddir}/logs/${pkgname}.log.bz2 + rm -f ${builddir}/errors/${pkgname}.log ${builddir}/errors/${pkgname}.log.bz2 + + echo "dispatching: ${ssh_cmd} -a -t -n ${client_user}@${host} ${sudo_cmd} ${command} ${arch} ${branch} ${buildid} ${chroot} ${flags} \"$ED\" \"$PD\" \"$FD\" \"$BD\" \"$RD\" ${args}" + ${pb}/scripts/ptimeout.host $timeout ${ssh_cmd} -a -t -n ${client_user}@${host} ${sudo_cmd} ${command} ${arch} ${branch} ${buildid} ${chroot} ${flags} \"$ED\" \"$PD\" \"$FD\" \"$BD\" \"$RD\" ${args} 2>&1 + error=$? + + #if grep -q " failed unexpectedly on " ${builddir}/logs/${pkgname}.pre.log; then + # cat ${builddir}/logs/${pkgname}.pre.log | mail -s "${pkgname} failed uncleanly on ${arch} ${branch}" ${mailto} #else - # rm ${pb}/${arch}/${branch}/logs/${pkgname}.pre.log + # rm ${builddir}/logs/${pkgname}.pre.log #fi - - # Pull in the results of the build from the client - - scp ${client_user}@${host}:${chroot}/tmp/${pkgname}.log ${pb}/${arch}/${branch}/logs/${pkgname}.log - (ssh -a -n ${client_user}@${host} test -f ${chroot}/tmp/work.tbz ) && scp ${client_user}@${host}:${chroot}/tmp/work.tbz ${pb}/${arch}/${branch}/wrkdirs/${pkgname}.tbz - - # XXX Set dirty flag if any of the scp's fail - - mkdir -p ${pb}/${arch}/${branch}/distfiles/.pbtmp/${pkgname} - ssh -a -n ${client_user}@${host} tar -C ${chroot}/tmp/distfiles -cf - . | \ - tar --unlink -C ${pb}/${arch}/${branch}/distfiles/.pbtmp/${pkgname} -xvf - - touch ${pb}/${arch}/${branch}/distfiles/.pbtmp/${pkgname}/.done - - if [ "${error}" = 0 ]; then - ssh -a -n ${client_user}@${host} tar -C ${chroot}/tmp -cf - packages | \ - tar --unlink -C ${pb}/${arch}/${branch} -xvf - - test -f ${pb}/${arch}/${branch}/packages/All/${pkgname}${PKGSUFFIX} && \ - touch ${pb}/${arch}/${branch}/packages/All/${pkgname}${PKGSUFFIX} - rm -f ${pb}/${arch}/${branch}/errors/${pkgname}.log && \ - touch ${pb}/${arch}/${branch}/errors/.force - lockf -k ${pb}/${arch}/${branch}/failure.lock ${pb}/scripts/buildsuccess ${arch} ${branch} ${pkgname} - log=${pb}/${arch}/${branch}/logs/$pkgname.log - if grep -q "even though it is marked BROKEN" ${log}; then - echo | mail -s "${pkgname} BROKEN but built on ${arch} ${branch}" ${mailto} - fi - if grep -q "^list of .*file" ${log}; then - buildlogdir=$(realpath ${pb}/${arch}/${branch}/logs/) - baselogdir=$(basename ${buildlogdir}) - (sed -e '/^build started/,$d' $log;echo;echo "For the full build log, see"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;sed -e '1,/^=== Checking filesystem state/d' $log) | mail -s "${pkgname} pkg-plist errors on ${arch} ${branch}" ${mailto} - fi - else - log=${pb}/${arch}/${branch}/errors/${pkgname}.log - scp ${client_user}@${host}:${chroot}/tmp/${pkgname}.log ${log} || (echo ${chroot}@${host}; ssh -a -n ${client_user}@${host} ls -laR ${chroot}/tmp) | mail -s "${pkgname} logfile not found" ${mailto} - if ! grep -q "even though it is marked BROKEN" ${log}; then - buildlogdir=$(realpath ${pb}/${arch}/${branch}/logs/) - baselogdir=$(basename ${buildlogdir}) - if [ `wc -l ${log} | awk '{print $1}'` -le `expr ${loglength} + ${hdrlength}` ]; then - (echo "You can also find this build log at"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;cat ${log}) | mail -s "${pkgname} failed on ${arch} ${branch}" ${mailto} - else - (echo "Excerpt from the build log at"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;sed -e '/^build started/,$d' $log;echo;echo " [... lines trimmed ...]";echo;tail -${loglength} ${log}) | mail -s "${pkgname} failed on ${arch} ${branch}" ${mailto} - fi + + # Pull in the results of the build from the client + + ${scp_cmd} ${client_user}@${host}:${chroot}/tmp/${pkgname}.log ${builddir}/logs/${pkgname}.log + (${ssh_cmd} -a -n ${client_user}@${host} test -f ${chroot}/tmp/work.tbz ) && ${scp_cmd} ${client_user}@${host}:${chroot}/tmp/work.tbz ${builddir}/wrkdirs/${pkgname}.tbz + + # XXX Set dirty flag if any of the scp's fail + + mkdir -p ${builddir}/distfiles/.pbtmp/${pkgname} + ${ssh_cmd} -a -n ${client_user}@${host} tar -C ${chroot}/tmp/distfiles -cf - . | \ + tar --unlink -C ${builddir}/distfiles/.pbtmp/${pkgname} -xvf - + touch ${builddir}/distfiles/.pbtmp/${pkgname}/.done + + if [ "${error}" = 0 ]; then + ${ssh_cmd} -a -n ${client_user}@${host} tar -C ${chroot}/tmp -cf - packages | \ + tar --unlink -C ${builddir} -xvf - + test -f ${builddir}/packages/All/${pkgname}${PKGSUFFIX} && \ + touch ${builddir}/packages/All/${pkgname}${PKGSUFFIX} + rm -f ${builddir}/errors/${pkgname}.log && \ + touch ${builddir}/errors/.force + lockf -k ${pbab}/failure.lock ${pb}/scripts/buildsuccess ${arch} ${branch} ${buildid} ${pkgname} + log=${builddir}/logs/$pkgname.log + if grep -q "even though it is marked BROKEN" ${log}; then + echo | mail -s "${pkgname} BROKEN but built on ${arch} ${branch}" ${mailto} + fi + if grep -q "^list of .*file" ${log}; then + buildlogdir=$(realpath ${builddir}/logs/) + baselogdir=$(basename ${buildlogdir}) + (sed -e '/^build started/,$d' $log;echo;echo "For the full build log, see"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;sed -e '1,/^=== Checking filesystem state/d' $log) | mail -s "${pkgname} pkg-plist errors on ${arch} ${branch}" ${mailto} + fi + else + log=${builddir}/errors/${pkgname}.log + ${scp_cmd} ${client_user}@${host}:${chroot}/tmp/${pkgname}.log ${log} || (echo ${chroot}@${host}; ${ssh_cmd} -a -n ${client_user}@${host} ls -laR ${chroot}/tmp) | mail -s "${pkgname} logfile not found" ${mailto} + if ! grep -q "even though it is marked BROKEN" ${log}; then + buildlogdir=$(realpath ${builddir}/logs/) + baselogdir=$(basename ${buildlogdir}) + if [ `wc -l ${log} | awk '{print $1}'` -le `expr ${loglength} + ${hdrlength}` ]; then + (echo "You can also find this build log at"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;cat ${log}) | mail -s "${pkgname} failed on ${arch} ${branch}" ${mailto} + else + (echo "Excerpt from the build log at"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;sed -e '/^build started/,$d' $log;echo;echo " [... lines trimmed ...]";echo;tail -${loglength} ${log}) | mail -s "${pkgname} failed on ${arch} ${branch}" ${mailto} + fi + fi + lockf -k ${pbab}/failure.lock ${pb}/scripts/buildfailure ${arch} ${branch} ${buildid} ${pkgname} fi - lockf -k ${pb}/${arch}/${branch}/failure.lock ${pb}/scripts/buildfailure ${arch} ${branch} ${pkgname} - fi - - ssh -a -n ${client_user}@${host} ${sudo_cmd} ${pb}/scripts/clean-chroot ${arch} ${branch} ${chroot} ${clean} - - lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/releasemachine ${arch} ${host} - + + ${ssh_cmd} -a -n ${client_user}@${host} ${sudo_cmd} ${pb}/scripts/clean-chroot ${arch} ${branch} ${buildid} ${chroot} ${clean} + + lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/releasemachine ${arch} ${host} + # XXX Set a dirty variable earlier and check here - if grep -q "^build of .*ended at" ${pb}/${arch}/${branch}/logs/${pkgname}.log; then - exit ${error} - else - echo "Build of ${pkgname} in ${host}:/${chroot} failed uncleanly, rebuilding" - sleep 120 - fi + if grep -q "^build of .*ended at" ${builddir}/logs/${pkgname}.log; then + exit ${error} + else + echo "Build of ${pkgname} in ${host}:/${chroot} failed uncleanly, rebuilding" + sleep 120 + fi done |