My Meaningless Scribbles ...: Download Fileserve links from shell

The script assumes, you have

ImageMagick
Curl
Common Unix utilities (eg. sed, awk, cut etc.)

... are installed in the path. The captcha is shown using 'display' component from ImageMagick. You need X forwarding if you are running the script over ssh.

Download source here: http://gsbabil.googlecode.com/svn-history/r8/trunk/fileserve.sh


#!/bin/bash
#source /home/babil/Desktop/recaptcha/plowshare-0.9.4/src/lib.sh

PUB_URL='http://www.fileserve.com/file/zXvrJS2'
FILESERVE_RECAPTCHA_PUBKEY='6LdSvrkSAAAAAOIwNj-IY-Q-p90hQrLinRIpZBPi'
IMAGE_VIEWER="display"

CAPTCHA_IMAGE=$(mktemp) #'captcha.jpg'
COOKIE=$(mktemp) #'tmp_cookie.txt'
HTML=$(mktemp) #'tmp_fileserve.htm'

AGENT='Mozilla/5.0 (X11; U; Linux x86_64; en-AU; rv:1.9.2.12) Gecko/20101027 Ubuntu/10.10 (maverick) Firefox/3.6.12'
CUSTOM_HEADERS='-H "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" -H "Accept-Language: en-us,en;q=0.5" -H "Accept-Encoding: gzip,deflate" -H "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7" -H "Keep-Alive: 115" -H "Connection: keep-alive"'
XML_HTTP_REQUEST='-H "X-Requested-With: XMLHTTPRequested"'

RED='\e[0;31m'
CYAN='\e[0;36m'
NC='\e[0m' # No Color


function regex_replace() {
    RE='s/\(\/\|\\\|&\)/\\&/g'
    FROM=$(sed -e "$RE" <<< "$1")
    TO=$(sed -e "$RE" <<< "$2")
    sed -e "s/$FROM/$TO/g"
}

# Delete leading and trailing spaces, tabs, \r, ...
function strip_whiteSpace() {
    echo "$1" | sed "s/^[[:space:]]*//; s/[[:space:]]*$//"
}

# Return uppercase string
function uppercase() {
    tr '[a-z]' '[A-Z]'
}

function clean_up()
{
 rm -f $COOKIE
 rm -f $CAPTCHA_IMAGE
 rm -f $HTML
}

function find_fileName()
{
 
 if [ -z $FILE_NAME ]
 then
  FILE_NAME=$(cat $HTML | grep 'Forum (BBCode)' | sed -n 's/.*File name: \(.*\) File size.*/\1/p')
 fi
 
 if [ -z $FILE_NAME ]
 then
  FILE_NAME=$(cat $HTML | grep 'Forum (BBCode)' | sed -n 's/.*url=\(.*\)\]\[b\].*/\1/p')
 fi
 
 if [ -z $FILE_NAME ]
 then
  FILE_NAME=$(cat $HTML | grep 'Website (HTML)' | sed -n 's/.*href=.\(.*\).>.b.File name.*/\1/p')
 fi

 if [ -z $FILE_NAME ]
 then
  FILE_NAME=$(cat $HTML | grep 'Text link' | sed -n 's/.*value="\(.*\)".*/\1/p')
 fi

 FILE_NAME=$(strip_whiteSpace $FILE_NAME)

}

function delay()
{
 for((i=$1;i>=1;i--))
 do
  echo -n "$i "
  sleep 1s
 done
 echo
}

function check_waitTime()
{
 WAIT_TIME=$(cat $HTML | sed -n 's/.*You need to wait \(.*\) seconds to start another download.*/\1/p')
 if [ -z $WAIT_TIME ]
 then
  WAIT_TIME=3
 fi
 WAIT_TIME=$(strip_whiteSpace $WAIT_TIME)
 echo $WAIT_TIME
}

function solve_recaptcha()
{
 CAPTCHA_URL='http://www.google.com/recaptcha/api/challenge?k='$FILESERVE_RECAPTCHA_PUBKEY
 echo -e "$RED [*] captcha url: $NC $CAPTCHA_URL "

 CHALLENGE=$(wget -q --referer $PUB_URL "$CAPTCHA_URL" -O - |  grep challenge | sed -n "s/.*'\(.*\)',/\1/p")
 echo -e "$RED [*] captcha challenge: $NC $CHALLENGE"
 wget -q --referer $PUB_URL http://www.google.com/recaptcha/api/image?c=$CHALLENGE -O $CAPTCHA_IMAGE 
 
 #convert -trim $CAPTCHA_IMAGE $CAPTCHA_IMAGE
 #$IMAGE_VIEWER "$CAPTCHA_IMAGE" &
 #pid=$!
 #read -p "[*] enter captcha response: " RESPONSE
 #RESPONSE=$(echo $RESPONSE | sed 's/ /+/g')
 #disown $(kill -9 $pid) 2>&1 1>/dev/null

 echo -e -n "$RED [*] captcha input : $NC"
 RESPONSE=$(prompt_captcha.py $CAPTCHA_IMAGE | sed 's/ /+/g')
 echo "$RESPONSE"

 RESULT=$(curl -s --user-agent "$AGENT" --cookie-jar $COOKIE --referer $PUB_URL http://www.fileserve.com/checkReCaptcha.php --data "recaptcha_challenge_field=$CHALLENGE&recaptcha_response_field=$RESPONSE&recaptcha_shortencode_field=$SHORTENCODE")
}


### - 0
if [ -z "$1" ]
then
 echo
 echo "[*] usage: $(basename $0) $PUB_URL"
 echo
 exit -1
else
 PUB_URL="$(strip_whiteSpace $1 | sed 's#//fileserve.com#//www.fileserve.com#g')"
fi
echo -e "$RED [*] attempting to download: $CYAN $PUB_URL $NC"


### - 1
clean_up

### - 2
SHORTENCODE=$(echo $PUB_URL | sed -n 's/.*file\/\(.*\)/\1/p' | cut -d '/' -f 1)
SHORTENCODE=$(strip_whiteSpace $SHORTENCODE)

### - 3 save cookie
curl -s --user-agent "$AGENT" --cookie-jar $COOKIE --referer $PUB_URL $PUB_URL --data 'checkTimeLimit=check' --output $HTML

### - 4 immitating "slow download".click()
curl -s --user-agent "$AGENT" --cookie $COOKIE --referer $PUB_URL $PUB_URL --data "recaptcha_shortencode_field=$SHORTENCODE" --output $HTML

### - 5 
if [ -z "$2" ]
then
 FILE_NAME="$(strip_whiteSpace $2)"
else
 FILE_NAME=""
fi
find_fileName
if [ -z $FILE_NAME ]
then
 echo "[*] can not find file-name, exiting ..."
 exit -1
else
 echo -e "$RED [*] file-name found: $NC $FILE_NAME"
fi

### - 6 solve captcha
RESULT="(null)"
while [ $RESULT != "success" ]
do
 solve_recaptcha
 RESULT=$(echo $RESULT | sed -n 's/{.*"\(.*\)".*/\1/p')
done
echo -e "$RED [*] captcha solved :)$NC"

### - 7 free-user delay
WAIT_TIME=$(curl -s --user-agent "$AGENT" --cookie $COOKIE --referer $PUB_URL $PUB_URL --data "downloadLink=wait" --output -)
echo -e "$RED [*] free-user delay: $NC $WAIT_TIME"
WAIT_TIME=$(echo $WAIT_TIME | sed 's/[^0-9]//g' | bc)
delay $WAIT_TIME
$(curl -s --user-agent "$AGENT" --cookie $COOKIE --referer $PUB_URL $PUB_URL --data "downloadLink=show" --output /dev/null)


### - 8 final download ... should give a 302 redirection
echo -e "$RED [*] Starting download ...$NC"
REDIR=$(curl --user-agent "$AGENT" --cookie $COOKIE --referer $PUB_URL $PUB_URL --data "download=normal" --location --location-trusted --output $FILE_NAME --write-out 'REDIRECT-URL:[%{url_effective}]' | sed -n 's/.*:\[\(.*\)\]/\1/p')

if [[ ! "$REDIR" =~ "/dl/" ]]
then
 echo -e "$RED [*] incorrect redirection; delay then restart ...$NC"
 mv -f $FILE_NAME $HTML
 WAIT_TIME=$(check_waitTime)
 delay $WAIT_TIME
 $0 $PUB_URL
else
 echo -e "$RED [*] done :D. $NC"
 clean_up
 exit
fi

4 comments:

Anonymous said...: Hi!

Thanks for posting your script!
I also downloaded your prompt_captcha.py und just tried fileserve.sh - however, PIL raises this:

IOError: cannot identify image file

Maybe fileserve's or recaptcha's api changed or something? Did I do something else wrong?
Thank you.; February 14, 2011 at 9:24 AM
Anonymous said...: Thanks! Also requires the prompt_captcha.py script to work.

I think that a "-C" in the "### -8 final download"'s curl command could be useful, it would supposedly resume a download that was interrupted for some reason, but I'm not sure it would work at all.; June 23, 2011 at 5:50 AM
Unknown said...: Hi, I don't think resume feature (or -C, continue at a specific offset) is supported by Fileserve's free download service. It's only available for premium users. Therefore, if you are downloading as a free user and downloading is interrupted, you will have to start over from the beginning of the file.; August 24, 2011 at 12:44 PM
Unknown said...: "IOError: cannot identify image file" might be the result of incorrectly downloaded captcha image. I am not sure if Fileserve has updated their HTML or changed thier Re-Captcha key (which I believe is unlikely). I will take a look and get back. Thanks for reporting it.

P.S. prompt_captcha.py should work with any regular image file. Can you just do "prompt_captcha.py any-image.{png,jpg}" and see if that works? Also, dont forget to make prompt_captcha.py executable.; August 24, 2011 at 12:48 PM

My Meaningless Scribbles ...

Monday, November 22, 2010

Download Fileserve links from shell

4 comments:

Post a Comment

My Links

Blog Archive

Labels

My Visitors