我正在使用PhantomJS v1.4.1加载一些网页。我不能访问他们的服务器端,我只能得到指向他们的链接。我使用的是过时的幻影版本,因为我需要在那些网页上支持Adobe Flash。
问题是许多网站正在异步加载他们的次要内容,这就是为什么幻影的onLoadFinished回调(类似于HTML语言中的onLoad )在不是所有内容都还没有加载的情况下过早地触发。有没有人可以建议我如何等待一个网页完全加载,例如,一个包含所有动态内容的屏幕截图,比如广告?
发布于 2013-02-07 18:36:54
另一种方法是只要求PhantomJS在页面加载后等待一段时间,然后再进行呈现,根据常规的rasterize.js示例,但需要更长的超时以允许JavaScript完成额外资源的加载:
page.open(address, function (status) {
if (status !== 'success') {
console.log('Unable to load the address!');
phantom.exit();
} else {
window.setTimeout(function () {
page.render(output);
phantom.exit();
}, 1000); // Change timeout as required to allow sufficient time
}
});
发布于 2013-02-07 18:32:31
您可以尝试waitfor和rasterize示例的组合:
/**
* See https://github.com/ariya/phantomjs/blob/master/examples/waitfor.js
*
* Wait until the test condition is true or a timeout occurs. Useful for waiting
* on a server response or for a ui change (fadeIn, etc.) to occur.
*
* @param testFx javascript condition that evaluates to a boolean,
* it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or
* as a callback function.
* @param onReady what to do when testFx condition is fulfilled,
* it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or
* as a callback function.
* @param timeOutMillis the max amount of time to wait. If not specified, 3 sec is used.
*/
function waitFor(testFx, onReady, timeOutMillis) {
var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3000, //< Default Max Timout is 3s
start = new Date().getTime(),
condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()), //< defensive code
interval = setInterval(function() {
if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
// If not time-out yet and condition not yet fulfilled
condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
} else {
if(!condition) {
// If condition still not fulfilled (timeout but condition is 'false')
console.log("'waitFor()' timeout");
phantom.exit(1);
} else {
// Condition fulfilled (timeout and/or condition is 'true')
console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condition is fulfilled
clearInterval(interval); //< Stop this interval
}
}
}, 250); //< repeat check every 250ms
};
var page = require('webpage').create(), system = require('system'), address, output, size;
if (system.args.length < 3 || system.args.length > 5) {
console.log('Usage: rasterize.js URL filename [paperwidth*paperheight|paperformat] [zoom]');
console.log(' paper (pdf output) examples: "5in*7.5in", "10cm*20cm", "A4", "Letter"');
phantom.exit(1);
} else {
address = system.args[1];
output = system.args[2];
if (system.args.length > 3 && system.args[2].substr(-4) === ".pdf") {
size = system.args[3].split('*');
page.paperSize = size.length === 2 ? {
width : size[0],
height : size[1],
margin : '0px'
} : {
format : system.args[3],
orientation : 'portrait',
margin : {
left : "5mm",
top : "8mm",
right : "5mm",
bottom : "9mm"
}
};
}
if (system.args.length > 4) {
page.zoomFactor = system.args[4];
}
var resources = [];
page.onResourceRequested = function(request) {
resources[request.id] = request.stage;
};
page.onResourceReceived = function(response) {
resources[response.id] = response.stage;
};
page.open(address, function(status) {
if (status !== 'success') {
console.log('Unable to load the address!');
phantom.exit();
} else {
waitFor(function() {
// Check in the page if a specific element is now visible
for ( var i = 1; i < resources.length; ++i) {
if (resources[i] != 'end') {
return false;
}
}
return true;
}, function() {
page.render(output);
phantom.exit();
}, 10000);
}
});
}
发布于 2016-07-20 04:40:02
这是一个等待所有资源请求完成的解决方案。一旦完成,它将把页面内容记录到控制台,并生成呈现页面的屏幕截图。
虽然这个解决方案可以作为一个很好的起点,但我观察到它失败了,所以它绝对不是一个完整的解决方案!
我使用document.readyState
的时候运气不是很好。
我受到了phantomjs examples page上的waitfor.js示例的影响。
var system = require('system');
var webPage = require('webpage');
var page = webPage.create();
var url = system.args[1];
page.viewportSize = {
width: 1280,
height: 720
};
var requestsArray = [];
page.onResourceRequested = function(requestData, networkRequest) {
requestsArray.push(requestData.id);
};
page.onResourceReceived = function(response) {
var index = requestsArray.indexOf(response.id);
if (index > -1 && response.stage === 'end') {
requestsArray.splice(index, 1);
}
};
page.open(url, function(status) {
var interval = setInterval(function () {
if (requestsArray.length === 0) {
clearInterval(interval);
var content = page.content;
console.log(content);
page.render('yourLoadedPage.png');
phantom.exit();
}
}, 500);
});
https://stackoverflow.com/questions/11340038
复制相似问题