i want scrape website containing excel file want download. download part works, when open file see forbidden message saying downloading of file not allowed.
var page = require('webpage').create(), domain = 'http://www.fagg-afmps.be', fs = require('fs'); phantom.casperpath = 'c:/casperjs'; phantom.injectjs('c:/casperjs/bin/bootstrap.js'); fs.changeworkingdirectory("c:\projects\inactive medicins"); var casper = require('casper').create({ pagesettings: { websecurityenabled: false, verbose: true } }); casper.useragent("mozilla/5.0 (windows nt 6.3; win64; x64) applewebkit/537.36 (khtml, gecko) chrome/37.0.2049.0 safari/537.36") casper.start(domain + '/nl/'); casper.then(function(){ var xlslinkoriginal = this.evaluate(function(){ return $('.xlslink').attr('href'); }); var filename = xlslinkoriginal.substring(xlslinkoriginal.lastindexof('/') + 1); var linkpath = domain + xlslinkoriginal.substring(0,xlslinkoriginal.lastindexof('/') + 1); try{ //file information this.echo('filename: ' + unescape(filename)); this.echo('path: ' + linkpath + unescape(filename)); this.download(linkpath, unescape(filename)); }catch(e){ this.echo(e); } }); casper.run(function() { this.echo('done.').exit(); });
do need add headers working? or problem?
sorry guys made mistake. i've put verbose in wrong place , didn't see errors. working code:
var page = require('webpage').create(), domain = 'http://www.fagg-afmps.be', fs = require('fs'); phantom.casperpath = 'c:/casperjs'; phantom.injectjs('c:/casperjs/bin/bootstrap.js'); //working directory fs.changeworkingdirectory("c:\projects\inactive medicins"); var casper = require('casper').create({ verbose: true, loglevel: 'debug',//needed debugging pagesettings: { websecurityenabled: false, } }); casper.useragent("mozilla/5.0 (windows nt 6.3; win64; x64) applewebkit/537.36 (khtml, gecko) chrome/37.0.2049.0 safari/537.36") casper.start(domain + '/nl/'); casper.then(function(){ var xlslink = this.evaluate(function(){ return $('.xlslink').attr('href'); }); var filename = xlslink.substring(xlslink.lastindexof('/') + 1); var linkpath = domain + xlslink.substring(0,xlslink.lastindexof('/') + 1); try{ //file information this.echo('filename: ' + unescape(filename)); this.echo('path: ' + linkpath + unescape(filename)); //here mistake. needs url needs //to download , path needs store on //computer. this.download(xlslink, fs.workingdirectory + "/test.xls"); }catch(e){ this.echo(e); } }); casper.run(function() { this.echo('done.').exit(); });
Comments
Post a Comment