首页
学习
活动
专区
工具
TVP
发布
社区首页 >问答首页 >phantomjs不等待“完整”页面加载

phantomjs不等待“完整”页面加载
EN

Stack Overflow用户
提问于 2012-07-05 15:53:20
回答 13查看 152.4K关注 0票数 142

我正在使用PhantomJS v1.4.1加载一些网页。我不能访问他们的服务器端,我只能得到指向他们的链接。我使用的是过时的幻影版本,因为我需要在那些网页上支持Adobe Flash。

问题是许多网站正在异步加载他们的次要内容,这就是为什么幻影的onLoadFinished回调(类似于HTML语言中的onLoad )在不是所有内容都还没有加载的情况下过早地触发。有没有人可以建议我如何等待一个网页完全加载,例如,一个包含所有动态内容的屏幕截图,比如广告?

EN

回答 13

Stack Overflow用户

发布于 2013-02-07 18:36:54

另一种方法是只要求PhantomJS在页面加载后等待一段时间,然后再进行呈现,根据常规的rasterize.js示例,但需要更长的超时以允许JavaScript完成额外资源的加载:

代码语言:javascript
复制
page.open(address, function (status) {
    if (status !== 'success') {
        console.log('Unable to load the address!');
        phantom.exit();
    } else {
        window.setTimeout(function () {
            page.render(output);
            phantom.exit();
        }, 1000); // Change timeout as required to allow sufficient time 
    }
});
票数 77
EN

Stack Overflow用户

发布于 2013-02-07 18:32:31

您可以尝试waitfor和rasterize示例的组合:

代码语言:javascript
复制
/**
 * See https://github.com/ariya/phantomjs/blob/master/examples/waitfor.js
 * 
 * Wait until the test condition is true or a timeout occurs. Useful for waiting
 * on a server response or for a ui change (fadeIn, etc.) to occur.
 *
 * @param testFx javascript condition that evaluates to a boolean,
 * it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or
 * as a callback function.
 * @param onReady what to do when testFx condition is fulfilled,
 * it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or
 * as a callback function.
 * @param timeOutMillis the max amount of time to wait. If not specified, 3 sec is used.
 */
function waitFor(testFx, onReady, timeOutMillis) {
    var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3000, //< Default Max Timout is 3s
        start = new Date().getTime(),
        condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()), //< defensive code
        interval = setInterval(function() {
            if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
                // If not time-out yet and condition not yet fulfilled
                condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
            } else {
                if(!condition) {
                    // If condition still not fulfilled (timeout but condition is 'false')
                    console.log("'waitFor()' timeout");
                    phantom.exit(1);
                } else {
                    // Condition fulfilled (timeout and/or condition is 'true')
                    console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
                    typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condition is fulfilled
                    clearInterval(interval); //< Stop this interval
                }
            }
        }, 250); //< repeat check every 250ms
};

var page = require('webpage').create(), system = require('system'), address, output, size;

if (system.args.length < 3 || system.args.length > 5) {
    console.log('Usage: rasterize.js URL filename [paperwidth*paperheight|paperformat] [zoom]');
    console.log('  paper (pdf output) examples: "5in*7.5in", "10cm*20cm", "A4", "Letter"');
    phantom.exit(1);
} else {
    address = system.args[1];
    output = system.args[2];
    if (system.args.length > 3 && system.args[2].substr(-4) === ".pdf") {
        size = system.args[3].split('*');
        page.paperSize = size.length === 2 ? {
            width : size[0],
            height : size[1],
            margin : '0px'
        } : {
            format : system.args[3],
            orientation : 'portrait',
            margin : {
                left : "5mm",
                top : "8mm",
                right : "5mm",
                bottom : "9mm"
            }
        };
    }
    if (system.args.length > 4) {
        page.zoomFactor = system.args[4];
    }
    var resources = [];
    page.onResourceRequested = function(request) {
        resources[request.id] = request.stage;
    };
    page.onResourceReceived = function(response) {
        resources[response.id] = response.stage;
    };
    page.open(address, function(status) {
        if (status !== 'success') {
            console.log('Unable to load the address!');
            phantom.exit();
        } else {
            waitFor(function() {
                // Check in the page if a specific element is now visible
                for ( var i = 1; i < resources.length; ++i) {
                    if (resources[i] != 'end') {
                        return false;
                    }
                }
                return true;
            }, function() {
               page.render(output);
               phantom.exit();
            }, 10000);
        }
    });
}
票数 21
EN

Stack Overflow用户

发布于 2016-07-20 04:40:02

这是一个等待所有资源请求完成的解决方案。一旦完成,它将把页面内容记录到控制台,并生成呈现页面的屏幕截图。

虽然这个解决方案可以作为一个很好的起点,但我观察到它失败了,所以它绝对不是一个完整的解决方案!

我使用document.readyState的时候运气不是很好。

我受到了phantomjs examples page上的waitfor.js示例的影响。

代码语言:javascript
复制
var system = require('system');
var webPage = require('webpage');

var page = webPage.create();
var url = system.args[1];

page.viewportSize = {
  width: 1280,
  height: 720
};

var requestsArray = [];

page.onResourceRequested = function(requestData, networkRequest) {
  requestsArray.push(requestData.id);
};

page.onResourceReceived = function(response) {
  var index = requestsArray.indexOf(response.id);
  if (index > -1 && response.stage === 'end') {
    requestsArray.splice(index, 1);
  }
};

page.open(url, function(status) {

  var interval = setInterval(function () {

    if (requestsArray.length === 0) {

      clearInterval(interval);
      var content = page.content;
      console.log(content);
      page.render('yourLoadedPage.png');
      phantom.exit();
    }
  }, 500);
});
票数 16
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/11340038

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档