diff options
| -rwxr-xr-x | concurrent_dl.rc | 57 | ||||
| -rwxr-xr-x | concurrent_dl.sh | 94 | 
2 files changed, 151 insertions, 0 deletions
diff --git a/concurrent_dl.rc b/concurrent_dl.rc new file mode 100755 index 0000000..d08a56a --- /dev/null +++ b/concurrent_dl.rc @@ -0,0 +1,57 @@ +fn rand { +	dd if=/dev/urandom bs=1 count=2 >[2]/dev/null | base16 | tr 'a-f' '0-9' | sed -e 's|^|.|' +} + +fn dl { +	echo $2 +	curl --max-time 10 -sLo $1 $2 -H @$3 +} + +#fn index_out { +#	sed -e 's|.*&||' -e 's|^|index_d|' +#} +fn strip_path { +	sed -e 's|.*/||' +} + +chunk = 10 +# $1 is a function for determining output file names from urls +# $2 is a file with HTTP headers + +fn concurrent_dl { +	in=`{cat} +	filter = $1 + +	if (test -z $filter) { +		filter = strip_path +	} + +	i = 0 +	limit = $#in +	while (test $i -lt $limit) { +		end=`{echo $i + $chunk | bc} +		list = `{for (j in `{seq $i $end}) { +				echo $in($j) +			} +		} + +		pids = '' +		for (url in $list) { +			o = `{echo $url | $filter} + +			if (! test -f $o) { +				dl $o $url $2 & +				pids = ($apid $pids) +			} +		} + +		if (test -n $"pids) { +			for (pid in $pids) { +				echo waiting on $pid +				wait $pid +			} +		} + +		i = $end +	} +} diff --git a/concurrent_dl.sh b/concurrent_dl.sh new file mode 100755 index 0000000..f334e8e --- /dev/null +++ b/concurrent_dl.sh @@ -0,0 +1,94 @@ +#!/bin/sh + +chunk=10 +dl_safe() +{ +	echo "$1 <- $2" +	if (echo $1 | grep '/' >/dev/null) && ! [ -d ${1%/*} ] +	then +		mkdir -p ${1%/*} +	fi +	 + +	done=-1 +	i=0 +	limit=10 +	while [ $done -ne 0 ] && [ $i -lt $limit ] +	do +		curl --max-time 10 -sLo $1 $2 -H @$3 +		done=$? +		i=$((i+1)) +	done +	if [ $i -eq $limit ] +	then +		echo Tried $limit times, bad URL. +	fi +} +strip_path() +{ +	sed -e 's|.*/||' +} +concurrent_dl() +{ +	in="$(cat | tr ' ' '\n')" +	filter=$1 +	if [ -z $filter ] +	then +		filter=strip_path +	fi +	headers=$2 +	if [ -z $headers ] +	then +		echo No headers, no good. +		return +	fi + +	n=$(printf '%s\n' "$in" | wc -l) +	r=$((n%10)) +	l=$((n-r)) +	i=0 +	printf '%s\n' "$in" | ( +	while [ $i -lt $l ] +	do +		j=$i +		i=$((i+$chunk)) +		pids="" + +		while [ $j -lt $i ] +		do +			read url +			o=$(echo "$url" | $filter) +			if ! [ -f $o ] +			then +				dl_safe $o $url Ûheader & +				pids="$pids $!" +			fi +			j=$((j+1)) +		done + +		for pid in $pids +		do +			echo waiting on $pid +			wait $pid +		done +	done +	i=0 +	while [ $i -lt $r ] +	do +		pids="" +		read url +		o=$(echo "$url" | $filter) +		if ! [ -f $o ] +		then +			dl_safe $o $url $headers & +			pids="$pids $!" +		fi +		i=$((i+1)) +	done +	for pid in $pids +	do +		echo waiting on $pid +		wait $pid +	done +	) +}  | 
