Skip to content

Commit

Permalink
feat(web-page): disable browser cache and selectively proxy requests …
Browse files Browse the repository at this point in the history
…to bypass CSP/CORS restrictions
  • Loading branch information
azasypkin committed Dec 9, 2023
1 parent f9507eb commit 6825861
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 38 deletions.
88 changes: 71 additions & 17 deletions src/api/web_page/content/get.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@ import { mock, test } from 'node:test';
import type { Browser } from 'playwright/index.js';

import { registerWebPageContentGetRoutes } from './get.js';
import { createBrowserMock, createPageMock, createWindowMock } from '../../../mocks.js';
import {
createBrowserContextMock,
createBrowserMock,
createCDPSessionMock,
createPageMock,
createWindowMock,
} from '../../../mocks.js';
import { createMock } from '../../api_route_params.mocks.js';

await test('[/api/web_page/content] can successfully create route', () => {
Expand All @@ -15,15 +21,16 @@ await test('[/api/web_page/content] can extract content', async (t) => {
t.mock.method(Date, 'now', () => 123000);

const windowMock = createWindowMock();

const pageMock = createPageMock({
window: windowMock,
responses: [],
content: '<body><div>Hello Secutils.dev and world!</div><div>Hello World</div></body>',
});
const cdpSessionMock = createCDPSessionMock();
const browserContextMock = createBrowserContextMock(pageMock, cdpSessionMock);

const response = await registerWebPageContentGetRoutes(
createMock({ browser: createBrowserMock(pageMock) as unknown as Browser }),
createMock({ browser: createBrowserMock(browserContextMock) as unknown as Browser }),
).inject({
method: 'POST',
url: '/api/web_page/content',
Expand All @@ -40,17 +47,65 @@ await test('[/api/web_page/content] can extract content', async (t) => {
}),
);

// Make sure we cleared the cache.
assert.strictEqual(cdpSessionMock.send.mock.callCount(), 2);
assert.deepEqual(cdpSessionMock.send.mock.calls[0].arguments, ['Network.clearBrowserCache']);
assert.deepEqual(cdpSessionMock.send.mock.calls[1].arguments, ['Network.setCacheDisabled', { cacheDisabled: true }]);

// Maure we set up a proxy URL to load resources bypassing CORS and CSP.
assert.strictEqual(pageMock.route.mock.callCount(), 1);
assert.deepEqual(pageMock.route.mock.calls[0].arguments[0], '**/proxy.secutils.dev/*');

// Make sure we loaded correct page.
assert.strictEqual(pageMock.goto.mock.callCount(), 1);
assert.deepEqual(pageMock.goto.mock.calls[0].arguments, [
'https://secutils.dev',
{ waitUntil: 'domcontentloaded', timeout: 5000 },
]);
assert.deepEqual(pageMock.goto.mock.calls[0].arguments, ['https://secutils.dev', { timeout: 5000 }]);

// Make sure we didn't wait for a selector since it wasn't specified.
assert.strictEqual(pageMock.waitForSelector.mock.callCount(), 0);
});

await test('[/api/web_page/content] can proxy requests', async () => {
const windowMock = createWindowMock();
const pageMock = createPageMock({
window: windowMock,
responses: [],
content: '<body><div>Hello Secutils.dev and world!</div><div>Hello World</div></body>',
});
const browserContextMock = createBrowserContextMock(pageMock);

const response = await registerWebPageContentGetRoutes(
createMock({ browser: createBrowserMock(browserContextMock) as unknown as Browser }),
).inject({
method: 'POST',
url: '/api/web_page/content',
payload: { url: 'https://secutils.dev', delay: 0 },
});
assert.strictEqual(response.statusCode, 200);

// Maure we set up a proxy URL to load resources bypassing CORS and CSP.
assert.strictEqual(pageMock.route.mock.callCount(), 1);
assert.deepEqual(pageMock.route.mock.calls[0].arguments[0], '**/proxy.secutils.dev/*');

const proxyHandler = pageMock.route.mock.calls[0].arguments[1] as (route: unknown) => Promise<void>;
const mockProxyResponse = Symbol('response');
const routeMock = {
fetch: mock.fn(() => Promise.resolve(mockProxyResponse)),
request: () => ({
url: () =>
'https://secutils.dev/proxy.secutils.dev/https%3A%2F%2Fsecutils-dev.github.io%2Fsecutils-sandbox%2Fmodule.js',
}),
fulfill: mock.fn(),
};
await proxyHandler(routeMock);

assert.strictEqual(routeMock.fetch.mock.callCount(), 1);
assert.deepEqual(routeMock.fetch.mock.calls[0].arguments, [
{ url: 'https://secutils-dev.github.io/secutils-sandbox/module.js' },
]);
assert.strictEqual(routeMock.fulfill.mock.callCount(), 1);
assert.deepEqual(routeMock.fulfill.mock.calls[0].arguments, [{ response: mockProxyResponse }]);
});

await test('[/api/web_page/content] can inject content extractor', async (t) => {
t.mock.method(Date, 'now', () => 123000);

Expand All @@ -59,13 +114,13 @@ await test('[/api/web_page/content] can inject content extractor', async (t) =>
});

const windowMock = createWindowMock({ __secutils: { extractContent: extractContentMock } });

const pageMock = createPageMock({
window: windowMock,
responses: [],
});
const browserContextMock = createBrowserContextMock(pageMock);

const browserMock = createBrowserMock(pageMock);
const browserMock = createBrowserMock(browserContextMock);
const response = await registerWebPageContentGetRoutes(
createMock({ browser: browserMock as unknown as Browser }),
).inject({
Expand All @@ -92,15 +147,13 @@ await test('[/api/web_page/content] can inject content extractor', async (t) =>

// Make sure we loaded correct page.
assert.strictEqual(pageMock.goto.mock.callCount(), 1);
assert.deepEqual(pageMock.goto.mock.calls[0].arguments, [
'https://secutils.dev',
{ waitUntil: 'domcontentloaded', timeout: 5000 },
]);
assert.deepEqual(pageMock.goto.mock.calls[0].arguments, ['https://secutils.dev', { timeout: 5000 }]);

assert.strictEqual(browserMock.newPage.mock.callCount(), 1);
assert.deepEqual(browserMock.newPage.mock.calls[0].arguments, [
{ extraHTTPHeaders: { Cookie: 'my-cookie' }, bypassCSP: true },
assert.strictEqual(browserMock.newContext.mock.callCount(), 1);
assert.deepEqual(browserMock.newContext.mock.calls[0].arguments, [
{ extraHTTPHeaders: { Cookie: 'my-cookie' }, bypassCSP: false },
]);
assert.strictEqual(browserContextMock.newPage.mock.callCount(), 1);

// Make sure we didn't wait for a selector since it wasn't specified.
assert.strictEqual(pageMock.waitForSelector.mock.callCount(), 0);
Expand All @@ -122,9 +175,10 @@ await test('[/api/web_page/content] reports errors in content extractor', async
window: windowMock,
responses: [],
});
const browserContextMock = createBrowserContextMock(pageMock);

const response = await registerWebPageContentGetRoutes(
createMock({ browser: createBrowserMock(pageMock) as unknown as Browser }),
createMock({ browser: createBrowserMock(browserContextMock) as unknown as Browser }),
).inject({
method: 'POST',
url: '/api/web_page/content',
Expand Down
18 changes: 16 additions & 2 deletions src/api/web_page/content/get.ts
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,21 @@ async function getContent(
headers,
}: InputBodyParamsType,
): Promise<ApiResult<OutputBodyType>> {
const page = await browser.newPage({ extraHTTPHeaders: headers, bypassCSP: true });
const context = await browser.newContext({ extraHTTPHeaders: headers, bypassCSP: false });
const page = await context.newPage();

// Disable browser cache.
const cdpSession = await context.newCDPSession(page);
await cdpSession.send('Network.clearBrowserCache');
await cdpSession.send('Network.setCacheDisabled', { cacheDisabled: true });

// Set up a proxy URL to load resources bypassing CORS and CSP.
await page.route('**/proxy.secutils.dev/*', async (route) => {
const response = await route.fetch({
url: decodeURIComponent(new URL(route.request().url()).pathname.replace('/proxy.secutils.dev/', '')),
});
await route.fulfill({ response });
});

// Inject custom scripts if any.
if (scripts?.extractContent) {
Expand Down Expand Up @@ -206,7 +220,7 @@ async function getContent(
log.debug(`Fetching content for "${url}" (timeout: ${timeout}ms).`);
let response: Response | null;
try {
response = await page.goto(url, { waitUntil: 'domcontentloaded', timeout });
response = await page.goto(url, { timeout });
log.debug(`Page "${url}" is loaded.`);
} catch (err) {
const errorMessage = `Failed to load page "${url}": ${Diagnostics.errorMessage(err)}`;
Expand Down
40 changes: 25 additions & 15 deletions src/api/web_page/resources/list.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@ import type { Browser } from 'playwright/index.js';

import type { WebPageResourceWithRawData } from './list.js';
import { registerWebPageResourcesListRoutes } from './list.js';
import { createBrowserMock, createPageMock, createResponseMock, createWindowMock } from '../../../mocks.js';
import {
createBrowserContextMock,
createBrowserMock,
createCDPSessionMock,
createPageMock,
createResponseMock,
createWindowMock,
} from '../../../mocks.js';
import { createMock } from '../../api_route_params.mocks.js';

await test('[/api/web_page/resources] can successfully create route', () => {
Expand Down Expand Up @@ -79,9 +86,11 @@ await test('[/api/web_page/resources] can parse resources', async (t) => {
}),
],
});
const cdpSessionMock = createCDPSessionMock();
const browserContextMock = createBrowserContextMock(pageMock, cdpSessionMock);

const response = await registerWebPageResourcesListRoutes(
createMock({ browser: createBrowserMock(pageMock) as unknown as Browser }),
createMock({ browser: createBrowserMock(browserContextMock) as unknown as Browser }),
).inject({
method: 'POST',
url: '/api/web_page/resources',
Expand Down Expand Up @@ -194,12 +203,14 @@ await test('[/api/web_page/resources] can parse resources', async (t) => {
}),
);

// Make sure we cleared the cache.
assert.strictEqual(cdpSessionMock.send.mock.callCount(), 2);
assert.deepEqual(cdpSessionMock.send.mock.calls[0].arguments, ['Network.clearBrowserCache']);
assert.deepEqual(cdpSessionMock.send.mock.calls[1].arguments, ['Network.setCacheDisabled', { cacheDisabled: true }]);

// Make sure we loaded correct page.
assert.strictEqual(pageMock.goto.mock.callCount(), 1);
assert.deepEqual(pageMock.goto.mock.calls[0].arguments, [
'https://secutils.dev',
{ waitUntil: 'domcontentloaded', timeout: 5000 },
]);
assert.deepEqual(pageMock.goto.mock.calls[0].arguments, ['https://secutils.dev', { timeout: 5000 }]);

// Make sure we didn't wait for a selector since it wasn't specified.
assert.strictEqual(pageMock.waitForSelector.mock.callCount(), 0);
Expand Down Expand Up @@ -247,8 +258,9 @@ await test('[/api/web_page/resources] can inject resource filters', async (t) =>
}),
],
});
const browserContextMock = createBrowserContextMock(pageMock);

const browserMock = createBrowserMock(pageMock);
const browserMock = createBrowserMock(browserContextMock);
const response = await registerWebPageResourcesListRoutes(
createMock({ browser: browserMock as unknown as Browser }),
).inject({
Expand Down Expand Up @@ -292,14 +304,11 @@ await test('[/api/web_page/resources] can inject resource filters', async (t) =>

// Make sure we loaded correct page.
assert.strictEqual(pageMock.goto.mock.callCount(), 1);
assert.deepEqual(pageMock.goto.mock.calls[0].arguments, [
'https://secutils.dev',
{ waitUntil: 'domcontentloaded', timeout: 5000 },
]);
assert.deepEqual(pageMock.goto.mock.calls[0].arguments, ['https://secutils.dev', { timeout: 5000 }]);

assert.strictEqual(browserMock.newPage.mock.callCount(), 1);
assert.deepEqual(browserMock.newPage.mock.calls[0].arguments, [
{ extraHTTPHeaders: { Cookie: 'my-cookie' }, bypassCSP: true },
assert.strictEqual(browserMock.newContext.mock.callCount(), 1);
assert.deepEqual(browserMock.newContext.mock.calls[0].arguments, [
{ extraHTTPHeaders: { Cookie: 'my-cookie' }, bypassCSP: false },
]);

// Make sure we didn't wait for a selector since it wasn't specified.
Expand Down Expand Up @@ -355,9 +364,10 @@ await test('[/api/web_page/resources] reports errors in resource filters', async
window: windowMock,
responses: [],
});
const browserContextMock = createBrowserContextMock(pageMock);

const response = await registerWebPageResourcesListRoutes(
createMock({ browser: createBrowserMock(pageMock) as unknown as Browser }),
createMock({ browser: createBrowserMock(browserContextMock) as unknown as Browser }),
).inject({
method: 'POST',
url: '/api/web_page/resources',
Expand Down
10 changes: 8 additions & 2 deletions src/api/web_page/resources/list.ts
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,13 @@ async function getResourcesList(
log: FastifyBaseLogger,
{ url, waitSelector, timeout = DEFAULT_TIMEOUT_MS, delay = DEFAULT_DELAY_MS, scripts, headers }: InputBodyParamsType,
): Promise<ApiResult<OutputBodyType>> {
const page = await browser.newPage({ extraHTTPHeaders: headers, bypassCSP: true });
const context = await browser.newContext({ extraHTTPHeaders: headers, bypassCSP: false });
const page = await context.newPage();

// Disable browser cache.
const cdpSession = await context.newCDPSession(page);
await cdpSession.send('Network.clearBrowserCache');
await cdpSession.send('Network.setCacheDisabled', { cacheDisabled: true });

// Inject custom scripts if any.
if (scripts?.resourceFilterMap) {
Expand Down Expand Up @@ -207,7 +213,7 @@ async function getResourcesList(

log.debug(`Fetching resources for "${url}" (timeout: ${timeout}ms).`);
try {
await page.goto(url, { waitUntil: 'domcontentloaded', timeout });
await page.goto(url, { timeout });
log.debug(`Page "${url}" is loaded.`);
} catch (err) {
const errorMessage = `Failed to load page "${url}": ${Diagnostics.errorMessage(err)}`;
Expand Down
22 changes: 20 additions & 2 deletions src/mocks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,27 @@ import { mock } from 'node:test';

import type { SecutilsWindow } from './api/web_page/index.js';

export function createBrowserMock(pageMock?: ReturnType<typeof createPageMock>) {
export function createBrowserMock(browserContextMock?: BrowserContextMock) {
return {
isConnected: mock.fn(() => false),
newPage: mock.fn(() => pageMock ?? createPageMock()),
newContext: mock.fn(() => Promise.resolve(browserContextMock ?? createBrowserContextMock())),
};
}

export type BrowserContextMock = ReturnType<typeof createBrowserContextMock>;
export function createBrowserContextMock(
pageMock?: ReturnType<typeof createPageMock>,
cdpSessionMock?: ReturnType<typeof createCDPSessionMock>,
) {
return {
newCDPSession: mock.fn(() => Promise.resolve(cdpSessionMock ?? createCDPSessionMock())),
newPage: mock.fn(() => Promise.resolve(pageMock ?? createPageMock())),
};
}

export function createCDPSessionMock() {
return {
send: mock.fn(() => Promise.resolve()),
};
}

Expand All @@ -28,6 +45,7 @@ export function createPageMock({ window = createWindowMock(), responses = [], co
content: mock.fn(() => Promise.resolve(content)),
addInitScript: mock.fn(),
waitForSelector: mock.fn(),
route: mock.fn(),
evaluateHandle: mock.fn(() => window),
evaluate: mock.fn((fn: (args: unknown) => Promise<unknown>, args: unknown) => fn(args)),
};
Expand Down

0 comments on commit 6825861

Please sign in to comment.