我试图监测一个网站的变化与傀儡和MutationObserver。
要做到这一点,我必须评估javascript代码注入变异观察者对象。
当页面每次被重新加载时,我的问题就出现了,我的变异观察者不再被注入。
我需要检测页面何时重新加载自身,以再次注入突变观察者。
const puppeteer = require('puppeteer-extra')
const StealthPlugin = require('puppeteer-extra-plugin-stealth')
puppeteer.use(StealthPlugin())
//Ejecucion de Puppeteer
puppeteer.launch({ headless: false }).then(async browser => {
console.log('Running script..')
const page = await browser.newPage()
await page.goto('mywebsite');
//Wait for my selector
await page.waitForSelector('my-selector')
//Create a function to get data in puppeteer context
await page.exposeFunction('puppeteerLogMutation',(param1) => {
console.log('----NEW MUTATION----');
console.log(param1);
});
//Inject Mutation Observer
await page.evaluate(() => {
const observer = new MutationObserver((mutationList) => {
mutationList.forEach((mutation)=> {
//Do something with the mutation detected
//Send data to puppeteer context
puppeteerLogMutation(mutation.type)
})
});
//MutationObserver configure
const observerOptions = {
attributes: true,
childList: true,
subtree: true,
characterData: true,
attributeOldValue: true,
characterDataOldValue: true
};
//Mutation observer object execution
observer.observe(target, observerOptions);
}`
显然,当页面被重新加载时,我的注入不再工作了,所以我试图检测页面何时重新加载以再次注入观察者。
谢谢。
发布于 2022-10-03 13:15:13
下面的代码将在此url 'https://www.google.com/'
表示的页面中插入一个按钮,该按钮将在页面中再次添加,以防重新加载:如果刷新由于NavigationError而被垃圾处理,浏览器将崩溃
const puppeteer = require('puppeteer-core');
const launchOptions = {
headless: false,
executablePath: 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe', // because we are using puppeteer-core we must define this option
args: ['--start-maximized']
};
const BTN_CSS = `
<style>
.btn-next-url {
position: absolute;
top: 50px;
left: 70px;
height: 30px;
width: 70px;
font-size: 20px;
font-weight: bold;
color: white;
background-color: red;
border: none;
border-radius: 5px;
cursor: pointer;
opacity: 50%;
}
.btn-next-url:hover {
opacity: 100%
}
</style>`;
const BTN_HTML = `${BTN_CSS}
<button id="next-page-btn" class="btn-next-url" title="navigate to next url" onclick="( () => alert('button clicked') )()">>></button>`;
async function runTest() {
const browser = await puppeteer.launch(launchOptions);
const page = (await browser.pages())[0]; // use the default tab
await page.setViewport({width: 1920, height: 1080});
// expose a function in page context (the function will be executed in nodejs context but can be called from inside page context)
await page.exposeFunction("insertButtonInPage", () => insertButtonInPage(page));
// function provided here will be executed in the page context
await page.evaluateOnNewDocument( () => insertButtonInPage()); // the button is added in page here
await page.goto('https://www.google.com/');
await delay(15000);
await browser.close();
}
async function insertButtonInPage(page) {
await page.waitForSelector('body');
page.$eval('body', (body, btn_html) => { // adds a button in the page
body.insertAdjacentHTML('afterbegin', btn_html);
}, BTN_HTML);
}
const delay = (ms) => new Promise(r => setTimeout(r, ms));
runTest();
重要的部分是:
await page.exposeFunction("insertButtonInPage", () => insertButtonInPage(page));
这个公开了,即当前用于在页面中添加按钮的函数insertButtonInPage()
,并允许在页面上下文中使用。Page.exposeFunction()
接下来是对Page.evaluateOnNewDocument()的调用
await page.evaluateOnNewDocument( () => insertButtonInPage());
此添加了一个函数(在页面上下文中执行),该函数将在以下情况之一中调用:
函数在创建文档之后,但在运行任何脚本之前调用。
https://stackoverflow.com/questions/64434289
复制相似问题