import * as React from 'react'
  /* @jsx mdx */
import { mdx } from '@mdx-js/react';
/* @jsxRuntime classic */

/* @jsx mdx */

export const _frontmatter = {
  "title": "Serverless Chrome puppeteer",
  "description": "Build browser automations with Chrome Puppeteer and AWS Lambda. Take screenshots, test websites, scrape content. Anything a browser can do ✌️",
  "image": "./img/serverless-chrome-puppeteer.png"
};
const layoutProps = {
  _frontmatter
};
const MDXLayout = "wrapper";
export default function MDXContent({
  components,
  ...props
}) {
  return <MDXLayout {...layoutProps} {...props} components={components} mdxType="MDXLayout">
    <h1 {...{
      "id": "serverless-chrome-puppeteer"
    }}>{`Serverless Chrome puppeteer`}</h1>
    <p><img parentName="p" {...{
        "src": "/e1549ac47d0663145f95e0e941eeac34/serverless-chrome-puppeteer.svg",
        "alt": null
      }}></img></p>
    <p>{`Say you want to build a scraper, automate manual testing, or generate custom social cards for your website. What do you do?`}</p>
    <p>{`You could spin up a docker container, set up headless Chrome, add Puppeteer, write a script to run it all, add a server to create an API, and ...`}</p>
    <p>{`Or you can set up Serverless Chrome with AWS Lambda. Write a bit of code, hit deploy, and get a Chrome browser running on demand.`}</p>
    <p>{`That's what this chapter is about 🤘`}</p>
    <p>{`You'll learn how to:`}</p>
    <ul>
      <li parentName="ul">{`configure Chrome Puppeteer on AWS`}</li>
      <li parentName="ul">{`build a basic scraper`}</li>
      <li parentName="ul">{`take website screenshots`}</li>
      <li parentName="ul">{`run it on-demand`}</li>
    </ul>
    <p>{`We build a scraper that goes to `}<a parentName="p" {...{
        "href": "https://google.com"
      }}>{`google.com`}</a>{`, types in a phrase, and returns the first page of results. Then reuse the same code to return a screenshot.`}</p>
    <p>{`You can see `}<a parentName="p" {...{
        "href": "https://github.com/Swizec/serverlesshandbook.dev/tree/master/examples/serverless-chrome-example"
      }}>{`full code on GitHub`}</a></p>
    <h2 {...{
      "id": "serverless-chrome"
    }}>{`Serverless Chrome`}</h2>
    <p>{`Chrome's engine ships as the open source Chromium browser. Other browsers use it and add their own UI and custom features.`}</p>
    <p>{`You can use the engine for browser automation – scraping, testing, screenshots, etc. When you need to render a website, Chromium is your friend.`}</p>
    <p>{`This means:`}</p>
    <ul>
      <li parentName="ul">{`download a chrome binary`}</li>
      <li parentName="ul">{`set up an environment that makes it happy`}</li>
      <li parentName="ul">{`run in headless mode`}</li>
      <li parentName="ul">{`configure processes that talk to each other via complex sockets`}</li>
    </ul>
    <p><video parentName="p" {...{
        "style": {
          "margin": "auto auto",
          "display": "block",
          "maxWidth": "80%"
        },
        "autoPlay": true,
        "loop": true,
        "muted": true,
        "playsInline": true,
        "loading": "lazy"
      }}>{`
            `}<source parentName="video" {...{
          "src": "https://media2.giphy.com/media/fXK6nNrqoW9NNWWcPD/giphy-loop.mp4?cid=4ac046a21gu1h472y41gfkkebb5xgqcyv2ljqg57em8u2nrh&rid=giphy-loop.mp4&ct=g",
          "type": "video/mp4"
        }}></source>{`
        `}</video></p>
    <p>{`Others have solved this problem for you.`}</p>
    <p>{`Rather than figure it out yourself, I recommend using `}<a parentName="p" {...{
        "href": "https://github.com/alixaxel/chrome-aws-lambda"
      }}>{`chrome-aws-lambda`}</a>{`. It's the most up-to-date package for running Serverless Chrome.`}</p>
    <p>{`Here's what you need for a Serverless Chrome setup:`}</p>
    <ol>
      <li parentName="ol"><strong parentName="li">{`install dependencies`}</strong></li>
    </ol>
    <pre><code parentName="pre" {...{}}>{`$ yarn add chrome-aws-lambda@3.1.1 puppeteer@3.1.0 @types/puppeteer puppeteer-core@3.1.0
`}</code></pre>
    <p>{`This installs everything you need to both run and interact with Chrome. ✌️`}</p>
    <p>{`Check `}<a parentName="p" {...{
        "href": "https://github.com/alixaxel/chrome-aws-lambda#versioning"
      }}>{`chrome-aws-lambda/README`}</a>{` for the latest version of Chrome Puppeteer you can use. Make sure they match.`}</p>
    <ol {...{
      "start": 2
    }}>
      <li parentName="ol"><strong parentName="li">{`configure serverless.yml`}</strong></li>
    </ol>
    <pre><code parentName="pre" {...{
        "className": "language-yaml"
      }}>{`# serverless.yml

service: serverless-chrome-example

provider:
  name: aws
  runtime: nodejs12.x
  stage: dev

package:
  exclude:
    - node_modules/puppeteer/.local-chromium/**
`}</code></pre>
    <p>{`Configure a new service, make it run on AWS, use latest node.`}</p>
    <p>{`The `}<inlineCode parentName="p">{`package`}</inlineCode>{` part is important. It tells Serverless `}<em parentName="p">{`not`}</em>{` to package the chromium binary with your code. AWS rejects builds of that size.`}</p>
    <p>{`You are now ready to start running Chrome ✌️`}</p>
    <div id="lock" />
    <h3 {...{
      "id": "chrome-puppeteer-101"
    }}>{`Chrome Puppeteer 101`}</h3>
    <p><a parentName="p" {...{
        "href": "https://pptr.dev/"
      }}>{`Chrome Puppeteer`}</a>{` is a set of tools to interact with Chrome programmatically.`}</p>
    <blockquote>
      <p parentName="blockquote">{`Puppeteer is a Node library which provides a high-level API to control Chrome or Chromium over the DevTools Protocol. Puppeteer runs headless by default, but can be configured to run full (non-headless) Chrome or Chromium.`}</p>
    </blockquote>
    <p>{`Write code that interacts with a website like a person would. Anything a person can do on the web, you can do with Puppeteer.`}</p>
    <p>{`Core syntax feels like jQuery, but the objects are different than what you're used to. I've found it's best not to worry about the details.`}</p>
    <p>{`Here's how you click on a link:`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-javascript"
      }}>{`const page = await browser.newPage() // open a "tab"
page.goto("https://example.com") // navigates to URL

const div = await page.$("div#some_content") // grab a div
await div.click("a.target_link") // clicks link
`}</code></pre>
    <p>{`Always open a new page for every new browser context.`}</p>
    <p>{`Navigate to your URL then use jQuery-like selectors to interact with the page. You can feed selectors into `}<inlineCode parentName="p">{`click()`}</inlineCode>{` and other methods, or use the `}<inlineCode parentName="p">{`page.$`}</inlineCode>{` syntax to search around.`}</p>
    <h2 {...{
      "id": "build-a-scraper"
    }}>{`Build a scraper`}</h2>
    <p>{`Web scraping is fiddly but sounds simple in theory:`}</p>
    <ul>
      <li parentName="ul">{`load website`}</li>
      <li parentName="ul">{`find content`}</li>
      <li parentName="ul">{`read content`}</li>
      <li parentName="ul">{`return content in new format`}</li>
    </ul>
    <p>{`But that doesn't generalize. Each website is different.`}</p>
    <p>{`You adapt the core technique to each website you scrape and there's no telling when the HTML might change.`}</p>
    <p>{`You might even find websites that actively fight against scraping. Block bots, limit access speed, obfuscate HTML, ...`}</p>
    <p><em parentName="p">{`Please play nice and don't unleash thousands of parallel requests onto unsuspecting websites.`}</em></p>
    <p>{`You can watch me work on this project on YouTube, if you prefer video:`}</p>
    <lite-youtube {...{
      "videoid": "wRJTxahPIi4",
      "videostartat": "0"
    }}></lite-youtube>
    <p>{`And you can try the final result here: `}<a parentName="p" {...{
        "href": "https://4tydwq78d9.execute-api.us-east-1.amazonaws.com/dev/scraper"
      }}>{`https://4tydwq78d9.execute-api.us-east-1.amazonaws.com/dev/scraper`}</a></p>
    <h3 {...{
      "id": "1-more-dependencies"
    }}>{`1. more dependencies`}</h3>
    <p>{`Start with the `}<inlineCode parentName="p">{`serverless.yml`}</inlineCode>{` and dependencies from earlier (chrome-aws-lambda and puppeteer).`}</p>
    <p>{`Add `}<inlineCode parentName="p">{`aws-lambda`}</inlineCode>{`:`}</p>
    <pre><code parentName="pre" {...{}}>{`$ yarn add aws-lambda @types/aws-lambda
`}</code></pre>
    <p>{`Installs the code you need to interact with the AWS Lambda environment.`}</p>
    <h3 {...{
      "id": "2-add-a-scraper-function"
    }}>{`2. add a scraper function`}</h3>
    <p>{`Define a new scraper function in `}<inlineCode parentName="p">{`serverless.yml`}</inlineCode></p>
    <pre><code parentName="pre" {...{
        "className": "language-yaml"
      }}>{`# serverless.yml

functions:
  scraper:
    handler: dist/scraper.handler
    memorysize: 2536
    timeout: 30
    events:
      - http:
          path: scraper
          method: GET
          cors: true
`}</code></pre>
    <p>{`We're saying code lives in the `}<inlineCode parentName="p">{`handler`}</inlineCode>{` method exported from `}<inlineCode parentName="p">{`scraper`}</inlineCode>{`. We ask for lots of memory and a long timeout. Chrome is resource intensive and our code makes web requests, which might take a while.`}</p>
    <p>{`All this fires from a GET request on `}<inlineCode parentName="p">{`/scraper`}</inlineCode>{`.`}</p>
    <h3 {...{
      "id": "3-getchrome"
    }}>{`3. getChrome()`}</h3>
    <p>{`The `}<inlineCode parentName="p">{`getChrome`}</inlineCode>{` method instantiates a new browser context. I like to put this in a `}<inlineCode parentName="p">{`util`}</inlineCode>{` file.`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-typescript"
      }}>{`// src/util.ts

import chrome from "chrome-aws-lambda"

export async function getChrome() {
  let browser = null

  try {
    browser = await chrome.puppeteer.launch({
      args: chrome.args,
      defaultViewport: {
        width: 1920,
        height: 1080,
        isMobile: true,
        deviceScaleFactor: 2,
      },
      executablePath: await chrome.executablePath,
      headless: chrome.headless,
      ignoreHTTPSErrors: true,
    })
  } catch (err) {
    console.error("Error launching chrome")
    console.error(err)
  }

  return browser
}
`}</code></pre>
    <p>{`We launch a Chrome Puppeteer instance with default config and specify our own screen size.`}</p>
    <p>{`The `}<inlineCode parentName="p">{`isMobile`}</inlineCode>{` setting tricks many websites into loading faster. The `}<inlineCode parentName="p">{`deviceScaleFactor: 2`}</inlineCode>{` helps create better screenshots. It's like using a retina screen.`}</p>
    <p>{`Adding `}<inlineCode parentName="p">{`ignoreHTTPSErrors`}</inlineCode>{` makes the process more robust.`}</p>
    <p>{`If the browser fails to launch, we log debugging info.`}</p>
    <h3 {...{
      "id": "4-a-shared-createhandler"
    }}>{`4. a shared createHandler()`}</h3>
    <p>{`We're building 2 pieces of code that share a lot of logic – scraping and screenshots. Both need a browser, deal with errors, and parse URL queries.`}</p>
    <p>{`We build a common `}<inlineCode parentName="p">{`createHandler()`}</inlineCode>{` method that deals with boilerplate and calls the important function when ready.`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-typescript"
      }}>{`// src/util.ts

import { APIGatewayEvent } from "aws-lambda"
import { Browser } from "puppeteer"

// both scraper and screenshot have the same basic handler
// they just call a different method to do things
export const createHandler = (
  workFunction: (browser: Browser, search: string) => Promise<APIResponse>
) => async (event: APIGatewayEvent): Promise<APIResponse> => {
  const search =
    event.queryStringParameters && event.queryStringParameters.search

  if (!search) {
    return {
      statusCode: 400,
      body: "Please provide a ?search= parameter",
    }
  }

  const browser = await getChrome()

  if (!browser) {
    return {
      statusCode: 500,
      body: "Error launching Chrome",
    }
  }

  try {
    // call the function that does the real work
    const response = await workFunction(browser, search)

    return response
  } catch (err) {
    console.log(err)
    return {
      statusCode: 500,
      body: "Error scraping Google",
    }
  }
}
`}</code></pre>
    <p>{`We read the `}<inlineCode parentName="p">{`?search=`}</inlineCode>{` param, open a browser, and verify everything's set up.`}</p>
    <p>{`Then we call the passed-in `}<inlineCode parentName="p">{`workFunction`}</inlineCode>{`, which returns a response. If that fails, we throw a 500 error.`}</p>
    <h3 {...{
      "id": "5-scrapegoogle"
    }}>{`5. scrapeGoogle()`}</h3>
    <p>{`We're ready to scrape Google search results.`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-typescript"
      }}>{`async function scrapeGoogle(browser: Browser, search: string) {
  const page = await browser.newPage()
  await page.goto("https://google.com", {
    waitUntil: ["domcontentloaded", "networkidle2"],
  })

  // this part is specific to the page you're scraping
  await page.type("input[type=text]", search)

  const [response] = await Promise.all([
    page.waitForNavigation(),
    page.click("input[type=submit]"),
  ])

  if (!response.ok()) {
    throw "Couldn't get response"
  }

  await page.goto(response.url())

  // this part is very specific to the page you're scraping
  const searchResults = await page.$$(".rc")

  let links = await Promise.all(
    searchResults.map(async (result) => {
      return {
        url: await result.$eval("a", (node) => node.getAttribute("href")),
        title: await result.$eval("h3", (node) => node.innerHTML),
        description: await result.$eval("span.st", (node) => node.innerHTML),
      }
    })
  )

  return {
    statusCode: 200,
    body: JSON.stringify(links),
  }
}

export const handler = createHandler(scrapeGoogle)
`}</code></pre>
    <p>{`Lots going on here. Let's go piece by piece.`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-typescript"
      }}>{`const page = await browser.newPage()
await page.goto("https://google.com", {
  waitUntil: ["domcontentloaded", "networkidle2"],
})
`}</code></pre>
    <p>{`Open a new page, navigate to google.com, wait for everything to load. I recommend waiting for `}<inlineCode parentName="p">{`networkidle2`}</inlineCode>{`, which means all asynchronous requests have finished.`}</p>
    <p>{`Useful when dealing with complex webapps.`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-typescript"
      }}>{`// this part is specific to the page you're scraping
await page.type("input[type=text]", search)

const [response] = await Promise.all([
  page.waitForNavigation(),
  page.click("input[type=submit]"),
])

if (!response.ok()) {
  throw "Couldn't get response"
}

await page.goto(response.url())
`}</code></pre>
    <p>{`To scrape google, we type a search into the input field, then hit submit and wait for the page to load.`}</p>
    <p>{`This part is different for every website.`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-typescript"
      }}>{`// this part is very specific to the page you're scraping
const searchResults = await page.$$(".rc")

let links = await Promise.all(
  searchResults.map(async (result) => {
    return {
      url: await result.$eval("a", (node) => node.getAttribute("href")),
      title: await result.$eval("h3", (node) => node.innerHTML),
      description: await result.$eval("span.st", (node) => node.innerHTML),
    }
  })
)

return {
  statusCode: 200,
  body: JSON.stringify(links),
}
`}</code></pre>
    <p>{`When the results page loads, we:`}</p>
    <ul>
      <li parentName="ul">{`look for every `}<inlineCode parentName="li">{`.rc`}</inlineCode>{` DOM element – best identifier of search results I could find`}</li>
      <li parentName="ul">{`iterate through results`}</li>
      <li parentName="ul">{`get the info we want from each`}</li>
    </ul>
    <p>{`You can use the `}<inlineCode parentName="p">{`page.$eval`}</inlineCode>{` trick to parse DOM nodes with the same API you'd use in a browser. Executes your method on the nodes it finds and returns the result.`}</p>
    <h3 {...{
      "id": "6-hit-deploy-and-try-it-out"
    }}>{`6. hit deploy and try it out`}</h3>
    <p>{`You now have a bonafide web scraper. Wakes up on demand, runs chrome, turns Google search results into easy-to-use JSON.`}</p>
    <div><div parentName="div" {...{
        "className": "static-tweet-embed"
      }}>{`
        `}<a parentName="div" {...{
          "className": "author",
          "href": "https://t.co/VuU1lFnIe7"
        }}><img parentName="a" {...{
            "src": "https://pbs.twimg.com/profile_images/1423736293385662466/AnF0Fsi6_normal.jpg",
            "loading": "lazy",
            "alt": "Swizec Teller speaking at Reactathon avatar"
          }}></img><b parentName="a">{`Swizec Teller speaking at Reactathon`}</b>{`@Swizec`}</a>{`
        `}<blockquote parentName="div">{`this was fun, got a lambda that spits out JSON of the first page of google results`}<br parentName="blockquote"></br><br parentName="blockquote"></br>{`Here's `}<a parentName="blockquote" {...{
            "href": "https://twitter.com/hashtag/javascript"
          }}>{`#javascript`}</a>{` for example`}<br parentName="blockquote"></br><br parentName="blockquote"></br>{`couldn't quite get the screenshot version to work yet `}</blockquote>{`
        `}<div parentName="div" {...{
          "className": "media"
        }}><img parentName="div" {...{
            "src": "https://pbs.twimg.com/media/Ecwqgy3UMAAz-Pq.png",
            "width": "100%",
            "loading": "lazy",
            "alt": "Tweet media"
          }}></img></div>{`
        `}<div parentName="div" {...{
          "className": "time"
        }}><a parentName="div" {...{
            "href": "https://twitter.com/Swizec/status/1282446868950085632"
          }}>{`10:48:54 PM – 7/12/2020`}</a></div>{`
        `}<div parentName="div" {...{
          "className": "stats"
        }}><a parentName="div" {...{
            "href": "https://twitter.com/intent/like?tweet_id=1282446868950085632",
            "className": "like"
          }}><svg parentName="a" {...{
              "viewBox": "0 0 24 24",
              "className": "r-m0bqgq r-4qtqp9 r-yyyyoo r-1xvli5t r-dnmrzs r-bnwqim r-1plcrui r-lrvibr",
              "style": {}
            }}><g parentName="svg"><path parentName="g" {...{
                  "d": "M12 21.638h-.014C9.403 21.59 1.95 14.856 1.95 8.478c0-3.064 2.525-5.754 5.403-5.754 2.29 0 3.83 1.58 4.646 2.73.814-1.148 2.354-2.73 4.645-2.73 2.88 0 5.404 2.69 5.404 5.755 0 6.376-7.454 13.11-10.037 13.157H12zM7.354 4.225c-2.08 0-3.903 1.988-3.903 4.255 0 5.74 7.034 11.596 8.55 11.658 1.518-.062 8.55-5.917 8.55-11.658 0-2.267-1.823-4.255-3.903-4.255-2.528 0-3.94 2.936-3.952 2.965-.23.562-1.156.562-1.387 0-.014-.03-1.425-2.965-3.954-2.965z"
                }}></path></g></svg>{`0`}</a>{` `}<a parentName="div" {...{
            "href": "https://twitter.com/Swizec/status/1282446868950085632",
            "className": "reply"
          }}><svg parentName="a" {...{
              "viewBox": "0 0 24 24",
              "className": "r-m0bqgq r-4qtqp9 r-yyyyoo r-1xvli5t r-dnmrzs r-bnwqim r-1plcrui r-lrvibr"
            }}><g parentName="svg"><path parentName="g" {...{
                  "d": "M14.046 2.242l-4.148-.01h-.002c-4.374 0-7.8 3.427-7.8 7.802 0 4.098 3.186 7.206 7.465 7.37v3.828c0 .108.044.286.12.403.142.225.384.347.632.347.138 0 .277-.038.402-.118.264-.168 6.473-4.14 8.088-5.506 1.902-1.61 3.04-3.97 3.043-6.312v-.017c-.006-4.367-3.43-7.787-7.8-7.788zm3.787 12.972c-1.134.96-4.862 3.405-6.772 4.643V16.67c0-.414-.335-.75-.75-.75h-.396c-3.66 0-6.318-2.476-6.318-5.886 0-3.534 2.768-6.302 6.3-6.302l4.147.01h.002c3.532 0 6.3 2.766 6.302 6.296-.003 1.91-.942 3.844-2.514 5.176z"
                }}></path></g></svg>{`0`}</a></div>{`
    `}</div></div>
    <p>{`We left out project configuration boilerplate. You can find those details in other chapters or `}<a parentName="p" {...{
        "href": "https://github.com/Swizec/serverlesshandbook.dev/tree/master/examples/serverless-chrome-example"
      }}>{`see example code on GitHub`}</a>{`.`}</p>
    <h2 {...{
      "id": "take-screenshots"
    }}>{`Take screenshots`}</h2>
    <p>{`Taking screenshots is similar to scraping. Instead of parsing the page, you call `}<inlineCode parentName="p">{`.screenshot()`}</inlineCode>{` and get an image.`}</p>
    <p>{`Our example returns that image directly. You'll want to store on S3 and return a URL in a real project. Lambda isn't a great fit for large files.`}</p>
    <h3 {...{
      "id": "1-tell-api-gateway-to-serve-binary"
    }}>{`1. tell API Gateway to serve binary`}</h3>
    <p>{`First, we tell API Gateway that it's okay to serve binary data.`}</p>
    <p>{`I don't recommend this in production unless you have a great reason. Like a dynamic image that changes every time.`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-yaml"
      }}>{`# serverless.yml

provider:
  name: aws
  runtime: nodejs12.x
  stage: dev
  apiGateway:
    binaryMediaTypes:
      - "*/*"
`}</code></pre>
    <p>{`You can limit `}<inlineCode parentName="p">{`binaryMediaTypes`}</inlineCode>{` to specific types you intend to use. `}<inlineCode parentName="p">{`*/*`}</inlineCode>{` is easier.`}</p>
    <h3 {...{
      "id": "2-add-a-new-function"
    }}>{`2. add a new function`}</h3>
    <p>{`Next we define a new Lambda function`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-yaml"
      }}>{`# serverless.yml

functions:
  screenshot:
    handler: dist/screenshot.handler
    memorysize: 2536
    timeout: 30
    events:
      - http:
          path: screenshot
          method: GET
          cors: true
`}</code></pre>
    <p>{`Same as before, different name. Needs lots of memory and a long timeout.`}</p>
    <h3 {...{
      "id": "3-screenshotgoogle"
    }}>{`3. screenshotGoogle()`}</h3>
    <p>{`We're using similar machinery as before.`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-typescript"
      }}>{`// src/screenshot.ts

async function screenshotGoogle(browser: Browser, search: string) {
  const page = await browser.newPage()
  await page.goto("https://google.com", {
    waitUntil: ["domcontentloaded", "networkidle2"],
  })

  // this part is specific to the page you're screenshotting
  await page.type("input[type=text]", search)

  const [response] = await Promise.all([
    page.waitForNavigation(),
    page.click("input[type=submit]"),
  ])

  if (!response.ok()) {
    throw "Couldn't get response"
  }

  await page.goto(response.url())

  // this part is specific to the page you're screenshotting
  const element = await page.$("#main")

  if (!element) {
    throw "Couldn't find results div"
  }

  const boundingBox = await element.boundingBox()
  const imagePath = \`/tmp/screenshot-\${new Date().getTime()}.png\`

  if (!boundingBox) {
    throw "Couldn't measure size of results div"
  }

  await page.screenshot({
    path: imagePath,
    clip: boundingBox,
  })

  const data = fs.readFileSync(imagePath).toString("base64")

  return {
    statusCode: 200,
    headers: {
      "Content-Type": "image/png",
    },
    body: data,
    isBase64Encoded: true,
  }
}

export const handler = createHandler(screenshotGoogle)
`}</code></pre>
    <p>{`Same code up to when we load the results page. Type a query, hit submit, wait for reload.`}</p>
    <p>{`Then we do something different – measure the size of our results div.`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-typescript"
      }}>{`// this part is specific to the page you're screenshotting
const element = await page.$("#main")

if (!element) {
  throw "Couldn't find results div"
}

const boundingBox = await element.boundingBox()
const imagePath = \`/tmp/screenshot-\${new Date().getTime()}.png\`

if (!boundingBox) {
  throw "Couldn't measure size of results div"
}
`}</code></pre>
    <p>{`We look for results and grab their `}<inlineCode parentName="p">{`boundingBox()`}</inlineCode>{`. That tells us the `}<inlineCode parentName="p">{`x, y`}</inlineCode>{` coordinates and the `}<inlineCode parentName="p">{`width, height`}</inlineCode>{` size for a more focused screenshot.`}</p>
    <p>{`We set up an `}<inlineCode parentName="p">{`imagePath`}</inlineCode>{` in `}<inlineCode parentName="p">{`/tmp`}</inlineCode>{`. We can write to a file on Lambda's hard drive, `}<em parentName="p">{`but it will not stay there.`}</em>{` When our lambda turns off, the file is gone.`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-typescript"
      }}>{`await page.screenshot({
  path: imagePath,
  clip: boundingBox,
})
`}</code></pre>
    <p>{`Take a screenshot with `}<inlineCode parentName="p">{`page.screenshot()`}</inlineCode>{`. Saves to a file.`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-typescript"
      }}>{`const data = fs.readFileSync(imagePath).toString("base64")

return {
  statusCode: 200,
  headers: {
    "Content-Type": "image/png",
  },
  body: data,
  isBase64Encoded: true,
}
`}</code></pre>
    <p>{`Read the file into a Base64-encoded string and return a response. It must contain a content type – `}<inlineCode parentName="p">{`image/png`}</inlineCode>{` in our case – and tell API Gateway that it's Base64-encoded.`}</p>
    <p>{`This is where you'd upload to S3 in production.`}</p>
    <p>{`You can try mine here: `}<a parentName="p" {...{
        "href": "https://4tydwq78d9.execute-api.us-east-1.amazonaws.com/dev/screenshot"
      }}>{`https://4tydwq78d9.execute-api.us-east-1.amazonaws.com/dev/screenshot`}</a></p>
    <h2 {...{
      "id": "how-to-use-this"
    }}>{`How to use this`}</h2>
    <p>{`The most common use cases for Chrome Puppeteer are:`}</p>
    <ol>
      <li parentName="ol">{`Running automated tests`}</li>
      <li parentName="ol">{`Scraping websites cheaply`}</li>
      <li parentName="ol">{`Generating dynamic HTML-to-PNG images`}</li>
      <li parentName="ol">{`Generating PDFs`}</li>
    </ol>
    <p>{`3 and 4 are great because you can build a small website that renders a social card for your content and use this machinery to turn it into an image.`}</p>
    <p>{`Same for PDFs – build dynamic website, print-to-PDF with Chrome. Easier than generating PDFs by hand.`}</p>
    <p>{`Have fun 😊`}</p>
    <p>{`Next chapter we look at handling secrets.`}</p>

    </MDXLayout>;
}
;
MDXContent.isMDXComponent = true;
      