Monday, November 22, 2010

Download Fileserve links from shell

The script assumes, you have
  • ImageMagick
  • Curl
  • Common Unix utilities (eg. sed, awk, cut etc.)
... are installed in the path. The captcha is shown using 'display' component from ImageMagick. You need X forwarding if you are running the script over ssh.


#!/bin/bash
#source /home/babil/Desktop/recaptcha/plowshare-0.9.4/src/lib.sh

PUB_URL='http://www.fileserve.com/file/zXvrJS2'
FILESERVE_RECAPTCHA_PUBKEY='6LdSvrkSAAAAAOIwNj-IY-Q-p90hQrLinRIpZBPi'
IMAGE_VIEWER="display"

CAPTCHA_IMAGE=$(mktemp) #'captcha.jpg'
COOKIE=$(mktemp) #'tmp_cookie.txt'
HTML=$(mktemp) #'tmp_fileserve.htm'

AGENT='Mozilla/5.0 (X11; U; Linux x86_64; en-AU; rv:1.9.2.12) Gecko/20101027 Ubuntu/10.10 (maverick) Firefox/3.6.12'
CUSTOM_HEADERS='-H "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" -H "Accept-Language: en-us,en;q=0.5" -H "Accept-Encoding: gzip,deflate" -H "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7" -H "Keep-Alive: 115" -H "Connection: keep-alive"'
XML_HTTP_REQUEST='-H "X-Requested-With: XMLHTTPRequested"'

RED='\e[0;31m'
CYAN='\e[0;36m'
NC='\e[0m' # No Color


function regex_replace() {
RE='s/\(\/\|\\\|&\)/\\&/g'
FROM=$(sed -e "$RE" <<< "$1")
TO=$(sed -e "$RE" <<< "$2")
sed -e "s/$FROM/$TO/g"
}

# Delete leading and trailing spaces, tabs, \r, ...
function strip_whiteSpace() {
echo "$1" | sed "s/^[[:space:]]*//; s/[[:space:]]*$//"
}

# Return uppercase string
function uppercase() {
tr '[a-z]' '[A-Z]'
}

function clean_up()
{
rm -f $COOKIE
rm -f $CAPTCHA_IMAGE
rm -f $HTML
}

function find_fileName()
{

if [ -z $FILE_NAME ]
then
FILE_NAME=$(cat $HTML | grep 'Forum (BBCode)' | sed -n 's/.*File name: \(.*\) File size.*/\1/p')
fi

if [ -z $FILE_NAME ]
then
FILE_NAME=$(cat $HTML | grep 'Forum (BBCode)' | sed -n 's/.*url=\(.*\)\]\[b\].*/\1/p')
fi

if [ -z $FILE_NAME ]
then
FILE_NAME=$(cat $HTML | grep 'Website (HTML)' | sed -n 's/.*href=.\(.*\).>.b.File name.*/\1/p')
fi

if [ -z $FILE_NAME ]
then
FILE_NAME=$(cat $HTML | grep 'Text link' | sed -n 's/.*value="\(.*\)".*/\1/p')
fi

FILE_NAME=$(strip_whiteSpace $FILE_NAME)

}

function delay()
{
for((i=$1;i>=1;i--))
do
echo -n "$i "
sleep 1s
done
echo
}

function check_waitTime()
{
WAIT_TIME=$(cat $HTML | sed -n 's/.*You need to wait \(.*\) seconds to start another download.*/\1/p')
if [ -z $WAIT_TIME ]
then
WAIT_TIME=3
fi
WAIT_TIME=$(strip_whiteSpace $WAIT_TIME)
echo $WAIT_TIME
}

function solve_recaptcha()
{
CAPTCHA_URL='http://www.google.com/recaptcha/api/challenge?k='$FILESERVE_RECAPTCHA_PUBKEY
echo -e "$RED [*] captcha url: $NC $CAPTCHA_URL "

CHALLENGE=$(wget -q --referer $PUB_URL "$CAPTCHA_URL" -O - | grep challenge | sed -n "s/.*'\(.*\)',/\1/p")
echo -e "$RED [*] captcha challenge: $NC $CHALLENGE"
wget -q --referer $PUB_URL http://www.google.com/recaptcha/api/image?c=$CHALLENGE -O $CAPTCHA_IMAGE

#convert -trim $CAPTCHA_IMAGE $CAPTCHA_IMAGE
#$IMAGE_VIEWER "$CAPTCHA_IMAGE" &
#pid=$!
#read -p "[*] enter captcha response: " RESPONSE
#RESPONSE=$(echo $RESPONSE | sed 's/ /+/g')
#disown $(kill -9 $pid) 2>&1 1>/dev/null

echo -e -n "$RED [*] captcha input : $NC"
RESPONSE=$(prompt_captcha.py $CAPTCHA_IMAGE | sed 's/ /+/g')
echo "$RESPONSE"

RESULT=$(curl -s --user-agent "$AGENT" --cookie-jar $COOKIE --referer $PUB_URL http://www.fileserve.com/checkReCaptcha.php --data "recaptcha_challenge_field=$CHALLENGE&recaptcha_response_field=$RESPONSE&recaptcha_shortencode_field=$SHORTENCODE")
}


### - 0
if [ -z "$1" ]
then
echo
echo "[*] usage: $(basename $0) $PUB_URL"
echo
exit -1
else
PUB_URL="$(strip_whiteSpace $1 | sed 's#//fileserve.com#//www.fileserve.com#g')"
fi
echo -e "$RED [*] attempting to download: $CYAN $PUB_URL $NC"


### - 1
clean_up

### - 2
SHORTENCODE=$(echo $PUB_URL | sed -n 's/.*file\/\(.*\)/\1/p' | cut -d '/' -f 1)
SHORTENCODE=$(strip_whiteSpace $SHORTENCODE)

### - 3 save cookie
curl -s --user-agent "$AGENT" --cookie-jar $COOKIE --referer $PUB_URL $PUB_URL --data 'checkTimeLimit=check' --output $HTML

### - 4 immitating "slow download".click()
curl -s --user-agent "$AGENT" --cookie $COOKIE --referer $PUB_URL $PUB_URL --data "recaptcha_shortencode_field=$SHORTENCODE" --output $HTML

### - 5
if [ -z "$2" ]
then
FILE_NAME="$(strip_whiteSpace $2)"
else
FILE_NAME=""
fi
find_fileName
if [ -z $FILE_NAME ]
then
echo "[*] can not find file-name, exiting ..."
exit -1
else
echo -e "$RED [*] file-name found: $NC $FILE_NAME"
fi

### - 6 solve captcha
RESULT="(null)"
while [ $RESULT != "success" ]
do
solve_recaptcha
RESULT=$(echo $RESULT | sed -n 's/{.*"\(.*\)".*/\1/p')
done
echo -e "$RED [*] captcha solved :)$NC"

### - 7 free-user delay
WAIT_TIME=$(curl -s --user-agent "$AGENT" --cookie $COOKIE --referer $PUB_URL $PUB_URL --data "downloadLink=wait" --output -)
echo -e "$RED [*] free-user delay: $NC $WAIT_TIME"
WAIT_TIME=$(echo $WAIT_TIME | sed 's/[^0-9]//g' | bc)
delay $WAIT_TIME
$(curl -s --user-agent "$AGENT" --cookie $COOKIE --referer $PUB_URL $PUB_URL --data "downloadLink=show" --output /dev/null)


### - 8 final download ... should give a 302 redirection
echo -e "$RED [*] Starting download ...$NC"
REDIR=$(curl --user-agent "$AGENT" --cookie $COOKIE --referer $PUB_URL $PUB_URL --data "download=normal" --location --location-trusted --output $FILE_NAME --write-out 'REDIRECT-URL:[%{url_effective}]' | sed -n 's/.*:\[\(.*\)\]/\1/p')

if [[ ! "$REDIR" =~ "/dl/" ]]
then
echo -e "$RED [*] incorrect redirection; delay then restart ...$NC"
mv -f $FILE_NAME $HTML
WAIT_TIME=$(check_waitTime)
delay $WAIT_TIME
$0 $PUB_URL
else
echo -e "$RED [*] done :D. $NC"
clean_up
exit
fi

4 comments:

Anonymous said...

Hi!

Thanks for posting your script!
I also downloaded your prompt_captcha.py und just tried fileserve.sh - however, PIL raises this:

IOError: cannot identify image file

Maybe fileserve's or recaptcha's api changed or something? Did I do something else wrong?
Thank you.

Anonymous said...

Thanks! Also requires the prompt_captcha.py script to work.

I think that a "-C" in the "### -8 final download"'s curl command could be useful, it would supposedly resume a download that was interrupted for some reason, but I'm not sure it would work at all.

Unknown said...

Hi, I don't think resume feature (or -C, continue at a specific offset) is supported by Fileserve's free download service. It's only available for premium users. Therefore, if you are downloading as a free user and downloading is interrupted, you will have to start over from the beginning of the file.

Unknown said...

"IOError: cannot identify image file" might be the result of incorrectly downloaded captcha image. I am not sure if Fileserve has updated their HTML or changed thier Re-Captcha key (which I believe is unlikely). I will take a look and get back. Thanks for reporting it.

P.S. prompt_captcha.py should work with any regular image file. Can you just do "prompt_captcha.py any-image.{png,jpg}" and see if that works? Also, dont forget to make prompt_captcha.py executable.

Post a Comment