Skip to content

Client

urlscan.Client

Bases: BaseClient

Main client for urlscan.io API.

Source code in src/urlscan/client.py
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
class Client(BaseClient):
    """Main client for urlscan.io API."""

    def get_result(self, uuid: str) -> dict:
        """Get a result of a scan by UUID.

        Args:
            uuid (str): UUID.

        Returns:
            Dict: Scan result.

        Reference:
            https://urlscan.io/docs/api/#result

        """
        return self.get_json(f"/api/v1/result/{uuid}/")

    def get_screenshot(self, uuid: str) -> BytesIO:
        """Get a screenshot of a scan by UUID.

        Args:
            uuid (str): UUID.

        Returns:
            : Screenshot (img/png) as bytes.

        Reference:
            https://urlscan.io/docs/api/#screenshot

        """
        res = self._get(f"/screenshots/{uuid}.png")
        bio = BytesIO(res.content)
        bio.name = res.basename
        return bio

    def get_dom(self, uuid: str) -> str:
        """Get a DOM of a scan by UUID.

        Args:
            uuid (str): UUID

        Returns:
            str: DOM as a string.

        Reference:
            https://urlscan.io/docs/api/#dom

        """
        return self.get_text(f"/dom/{uuid}/")

    def get_response(self, file_hash: str) -> str:
        """Get a (Script|Document|Fetch|XHR) response in plain text format by SHA256 hash.

        Args:
            file_hash (str): SHA256 hash of the response.

        Returns:
            str: Response content as a string.

        Reference:
            https://docs.urlscan.io/apis/urlscan-openapi/scanning/response

        """
        return self.get_text(f"/responses/{file_hash}/")

    def search(
        self,
        q: str = "",
        size: int = 100,
        limit: int | None = None,
        search_after: str | None = None,
        datasource: SearchDataSource | None = None,
        collapse: str | None = None,
    ) -> SearchIterator:
        """Search.

        Args:
            q (str): Query term. Defaults to "".
            size (int, optional): Number of results returned in a search. Defaults to 100.
            limit (int | None, optional): Maximum number of results that will be returned by the iterator. Defaults to None.
            search_after (str | None, optional): Search after to retrieve next results. Defaults to None.
            datasource (SearchDataSource | None, optional): Datasources to search: scans (urlscan.io), hostnames, incidents, notifications, certificates (urlscan Pro). Defaults to None.
            collapse (str | None, optional): Field to collapse results on. Only works on current page of results. Defaults to None.

        Returns:
            SearchIterator: Search iterator.

        Reference:
            https://urlscan.io/docs/api/#search

        """
        return SearchIterator(
            self,
            path="/api/v1/search/",
            q=q,
            size=size,
            limit=limit,
            search_after=search_after,
            datasource=datasource,
            collapse=collapse,
        )

    def scan(
        self,
        url: str,
        *,
        visibility: VisibilityType,
        tags: list[str] | None = None,
        customagent: str | None = None,
        referer: str | None = None,
        override_safety: Any = None,
        country: str | None = None,
    ) -> dict:
        """Scan a given URL.

        Args:
            url (str): URL to scan.
            visibility (VisibilityType): Visibility of the scan. Can be "public", "private", or "unlisted".
            tags (list[str] | None, optional): Tags to be attached. Defaults to None.
            customagent (str | None, optional): Custom user agent. Defaults to None.
            referer (str | None, optional): Referer. Defaults to None.
            override_safety (Any, optional): If set to any value, this will disable reclassification of URLs with potential PII in them. Defaults to None.
            country (str | None, optional): Specify which country the scan should be performed from (2-Letter ISO-3166-1 alpha-2 country. Defaults to None.

        Returns:
            dict: Scan response.

        Reference:
            https://urlscan.io/docs/api/#scan

        """
        data = _compact(
            {
                "url": url,
                "tags": tags,
                "visibility": visibility,
                "customagent": customagent,
                "referer": referer,
                "overrideSafety": override_safety,
                "country": country,
            }
        )
        res = self._post("/api/v1/scan/", json=data)
        json_res = self._response_to_json(res)

        json_visibility = json_res.get("visibility")
        if json_visibility is not None and json_visibility != visibility:
            logger.warning(f"Visibility is enforced to {json_visibility}.")

        # memoize the scan UUID & timestamp
        uuid = json_res.get("uuid")
        if isinstance(uuid, str):
            self._scan_uuid_timestamp_memo[uuid] = time.time()

        return json_res

    def bulk_scan(
        self,
        urls: list[str],
        *,
        visibility: VisibilityType,
        tags: list[str] | None = None,
        customagent: str | None = None,
        referer: str | None = None,
        override_safety: Any = None,
        country: str | None = None,
    ) -> list[tuple[str, dict | Exception]]:
        """Scan multiple URLs in bulk.

        Args:
            urls (list[str]): List of URLs to scan.
            visibility (VisibilityType): Visibility of the scan. Can be "public", "private", or "unlisted".
            tags (list[str] | None, optional): Tags to be attached. Defaults to None.
            customagent (str | None, optional): Custom user agent. Defaults to None.
            referer (str | None, optional): Referer. Defaults to None.
            override_safety (Any, optional): If set to any value, this will disable reclassification of URLs with potential PII in them. Defaults to None.
            country (str | None, optional): Specify which country the scan should be performed from (2-Letter ISO-3166-1 alpha-2 country. Defaults to None.

        Returns:
            list[tuple[str, dict | Exception]]: A list of tuples of (url, scan response or error).

        Reference:
            https://urlscan.io/docs/api/#scan

        """

        def inner(url: str) -> dict | Exception:
            try:
                return self.scan(
                    url,
                    visibility=visibility,
                    tags=tags,
                    customagent=customagent,
                    referer=referer,
                    override_safety=override_safety,
                    country=country,
                )
            except Exception as e:
                return e

        return [(url, inner(url)) for url in urls]

    def wait_for_result(
        self,
        uuid: str,
        timeout: float = 60.0,
        interval: float = 1.0,
        initial_wait: float | None = 10.0,
    ) -> None:
        """Wait for a scan result to be available.

        Args:
            uuid (str): UUID of a result.
            timeout (float, optional): Timeout in seconds. Defaults to 60.0.
            interval (float, optional): Interval in seconds. Defaults to 1.0.
            initial_wait (float | None, optional): Initial wait time in seconds. Set None to disable. Defaults to 10.0.

        """
        session = self._get_session()
        req = session.build_request("HEAD", f"/api/v1/result/{uuid}/")

        scanned_at = self._scan_uuid_timestamp_memo.get(uuid)
        if scanned_at and initial_wait:
            elapsed = time.time() - scanned_at
            if elapsed < initial_wait:
                time.sleep(initial_wait - elapsed)

        start_time = time.time()
        while True:
            res = self._send_request(session, req)
            if res.status_code == 200:
                self._scan_uuid_timestamp_memo.pop(uuid, None)
                return

            if time.time() - start_time > timeout:
                raise TimeoutError("Timeout waiting for scan result.")

            time.sleep(interval)

    def scan_and_get_result(
        self,
        url: str,
        visibility: VisibilityType,
        tags: list[str] | None = None,
        customagent: str | None = None,
        referer: str | None = None,
        override_safety: Any = None,
        country: str | None = None,
        timeout: float = 60.0,
        interval: float = 1.0,
        initial_wait: float | None = 10.0,
    ) -> dict:
        """Scan a given URL, wait for a result and get it.

        Args:
            url (str): URL to scan.
            visibility (VisibilityType): Visibility of the scan. Can be "public", "private", or "unlisted".
            tags (list[str] | None, optional): Tags to be attached. Defaults to None.
            customagent (str | None, optional): Custom user agent. Defaults to None.
            referer (str | None, optional): Referer. Defaults to None.
            override_safety (Any, optional): If set to any value, this will disable reclassification of URLs with potential PII in them. Defaults to None.
            country (str | None, optional): Specify which country the scan should be performed from (2-Letter ISO-3166-1 alpha-2 country. Defaults to None.
            timeout (float, optional): Timeout for waiting a result in seconds. Defaults to 60.0.
            interval (float, optional): Interval in seconds. Defaults to 1.0.
            initial_wait (float | None, optional): Initial wait time in seconds. Set None to disable. Defaults to 10.0.

        Returns:
            dict: Scan result.

        Reference:
            https://urlscan.io/docs/api/#scan

        """
        res = self.scan(
            url,
            visibility=visibility,
            tags=tags,
            customagent=customagent,
            referer=referer,
            override_safety=override_safety,
            country=country,
        )
        uuid: str = res["uuid"]
        self.wait_for_result(
            uuid, timeout=timeout, interval=interval, initial_wait=initial_wait
        )
        return self.get_result(uuid)

    def bulk_scan_and_get_results(
        self,
        urls: list[str],
        visibility: VisibilityType,
        tags: list[str] | None = None,
        customagent: str | None = None,
        referer: str | None = None,
        override_safety: Any = None,
        country: str | None = None,
        timeout: float = 60.0,
        interval: float = 1.0,
        initial_wait: float | None = 10.0,
    ) -> list[tuple[str, dict | Exception]]:
        """Scan URLs, wait for results and get them.

        Args:
            urls (list[str]): URLs to scan.
            visibility (VisibilityType): Visibility of the scan. Can be "public", "private", or "unlisted".
            tags (list[str] | None, optional): Tags to be attached. Defaults to None.
            customagent (str | None, optional): Custom user agent. Defaults to None.
            referer (str | None, optional): Referer. Defaults to None.
            override_safety (Any, optional): If set to any value, this will disable reclassification of URLs with potential PII in them. Defaults to None.
            country (str | None, optional): Specify which country the scan should be performed from (2-Letter ISO-3166-1 alpha-2 country. Defaults to None.
            timeout (float, optional): Timeout for waiting a result in seconds. Defaults to 60.0.
            interval (float, optional): Interval in seconds. Defaults to 1.0.
            initial_wait (float | None, optional): Initial wait time in seconds. Set None to disable. Defaults to 10.0.

        Returns:
            list[tuple[str, dict | Exception]]: A list of tuples of (url, result or error).

        Reference:
            https://urlscan.io/docs/api/#scan

        """
        responses = self.bulk_scan(
            urls,
            visibility=visibility,
            tags=tags,
            customagent=customagent,
            referer=referer,
            override_safety=override_safety,
            country=country,
        )

        def mapping(res_or_error: dict | Exception) -> dict | Exception:
            if isinstance(res_or_error, Exception):
                return res_or_error

            uuid: str = res_or_error["uuid"]
            self.wait_for_result(
                uuid, timeout=timeout, interval=interval, initial_wait=initial_wait
            )
            return self.get_result(uuid)

        return [(url, mapping(res_or_error)) for url, res_or_error in responses]

    def get_available_countries(self) -> dict:
        """Retrieve countries available for scanning using the Scan API.

        Returns:
            dict: Available countries.

        Reference:
            https://docs.urlscan.io/apis/urlscan-openapi/scanning/availablecountries

        """
        return self.get_json("/api/v1/availableCountries")

    def get_user_agents(self) -> dict:
        """Get grouped user agents to use with the Scan API.

        Returns:
            dict: Available user agents.

        Reference:
            https://docs.urlscan.io/apis/urlscan-openapi/scanning/useragents

        """
        return self.get_json("/api/v1/userAgents")

    def get_quotas(self) -> dict:
        """Get available and used API quotas.

        Returns:
            dict: API quotas.

        Reference:
            https://docs.urlscan.io/apis/urlscan-openapi/generic/quotas

        """
        return self.get_json("/api/v1/quotas")

bulk_scan(urls, *, visibility, tags=None, customagent=None, referer=None, override_safety=None, country=None)

Scan multiple URLs in bulk.

Parameters:

Name Type Description Default
urls list[str]

List of URLs to scan.

required
visibility VisibilityType

Visibility of the scan. Can be "public", "private", or "unlisted".

required
tags list[str] | None

Tags to be attached. Defaults to None.

None
customagent str | None

Custom user agent. Defaults to None.

None
referer str | None

Referer. Defaults to None.

None
override_safety Any

If set to any value, this will disable reclassification of URLs with potential PII in them. Defaults to None.

None
country str | None

Specify which country the scan should be performed from (2-Letter ISO-3166-1 alpha-2 country. Defaults to None.

None

Returns:

Type Description
list[tuple[str, dict | Exception]]

list[tuple[str, dict | Exception]]: A list of tuples of (url, scan response or error).

Reference

https://urlscan.io/docs/api/#scan

Source code in src/urlscan/client.py
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
def bulk_scan(
    self,
    urls: list[str],
    *,
    visibility: VisibilityType,
    tags: list[str] | None = None,
    customagent: str | None = None,
    referer: str | None = None,
    override_safety: Any = None,
    country: str | None = None,
) -> list[tuple[str, dict | Exception]]:
    """Scan multiple URLs in bulk.

    Args:
        urls (list[str]): List of URLs to scan.
        visibility (VisibilityType): Visibility of the scan. Can be "public", "private", or "unlisted".
        tags (list[str] | None, optional): Tags to be attached. Defaults to None.
        customagent (str | None, optional): Custom user agent. Defaults to None.
        referer (str | None, optional): Referer. Defaults to None.
        override_safety (Any, optional): If set to any value, this will disable reclassification of URLs with potential PII in them. Defaults to None.
        country (str | None, optional): Specify which country the scan should be performed from (2-Letter ISO-3166-1 alpha-2 country. Defaults to None.

    Returns:
        list[tuple[str, dict | Exception]]: A list of tuples of (url, scan response or error).

    Reference:
        https://urlscan.io/docs/api/#scan

    """

    def inner(url: str) -> dict | Exception:
        try:
            return self.scan(
                url,
                visibility=visibility,
                tags=tags,
                customagent=customagent,
                referer=referer,
                override_safety=override_safety,
                country=country,
            )
        except Exception as e:
            return e

    return [(url, inner(url)) for url in urls]

bulk_scan_and_get_results(urls, visibility, tags=None, customagent=None, referer=None, override_safety=None, country=None, timeout=60.0, interval=1.0, initial_wait=10.0)

Scan URLs, wait for results and get them.

Parameters:

Name Type Description Default
urls list[str]

URLs to scan.

required
visibility VisibilityType

Visibility of the scan. Can be "public", "private", or "unlisted".

required
tags list[str] | None

Tags to be attached. Defaults to None.

None
customagent str | None

Custom user agent. Defaults to None.

None
referer str | None

Referer. Defaults to None.

None
override_safety Any

If set to any value, this will disable reclassification of URLs with potential PII in them. Defaults to None.

None
country str | None

Specify which country the scan should be performed from (2-Letter ISO-3166-1 alpha-2 country. Defaults to None.

None
timeout float

Timeout for waiting a result in seconds. Defaults to 60.0.

60.0
interval float

Interval in seconds. Defaults to 1.0.

1.0
initial_wait float | None

Initial wait time in seconds. Set None to disable. Defaults to 10.0.

10.0

Returns:

Type Description
list[tuple[str, dict | Exception]]

list[tuple[str, dict | Exception]]: A list of tuples of (url, result or error).

Reference

https://urlscan.io/docs/api/#scan

Source code in src/urlscan/client.py
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
def bulk_scan_and_get_results(
    self,
    urls: list[str],
    visibility: VisibilityType,
    tags: list[str] | None = None,
    customagent: str | None = None,
    referer: str | None = None,
    override_safety: Any = None,
    country: str | None = None,
    timeout: float = 60.0,
    interval: float = 1.0,
    initial_wait: float | None = 10.0,
) -> list[tuple[str, dict | Exception]]:
    """Scan URLs, wait for results and get them.

    Args:
        urls (list[str]): URLs to scan.
        visibility (VisibilityType): Visibility of the scan. Can be "public", "private", or "unlisted".
        tags (list[str] | None, optional): Tags to be attached. Defaults to None.
        customagent (str | None, optional): Custom user agent. Defaults to None.
        referer (str | None, optional): Referer. Defaults to None.
        override_safety (Any, optional): If set to any value, this will disable reclassification of URLs with potential PII in them. Defaults to None.
        country (str | None, optional): Specify which country the scan should be performed from (2-Letter ISO-3166-1 alpha-2 country. Defaults to None.
        timeout (float, optional): Timeout for waiting a result in seconds. Defaults to 60.0.
        interval (float, optional): Interval in seconds. Defaults to 1.0.
        initial_wait (float | None, optional): Initial wait time in seconds. Set None to disable. Defaults to 10.0.

    Returns:
        list[tuple[str, dict | Exception]]: A list of tuples of (url, result or error).

    Reference:
        https://urlscan.io/docs/api/#scan

    """
    responses = self.bulk_scan(
        urls,
        visibility=visibility,
        tags=tags,
        customagent=customagent,
        referer=referer,
        override_safety=override_safety,
        country=country,
    )

    def mapping(res_or_error: dict | Exception) -> dict | Exception:
        if isinstance(res_or_error, Exception):
            return res_or_error

        uuid: str = res_or_error["uuid"]
        self.wait_for_result(
            uuid, timeout=timeout, interval=interval, initial_wait=initial_wait
        )
        return self.get_result(uuid)

    return [(url, mapping(res_or_error)) for url, res_or_error in responses]

get_available_countries()

Retrieve countries available for scanning using the Scan API.

Returns:

Name Type Description
dict dict

Available countries.

Reference

https://docs.urlscan.io/apis/urlscan-openapi/scanning/availablecountries

Source code in src/urlscan/client.py
793
794
795
796
797
798
799
800
801
802
803
def get_available_countries(self) -> dict:
    """Retrieve countries available for scanning using the Scan API.

    Returns:
        dict: Available countries.

    Reference:
        https://docs.urlscan.io/apis/urlscan-openapi/scanning/availablecountries

    """
    return self.get_json("/api/v1/availableCountries")

get_dom(uuid)

Get a DOM of a scan by UUID.

Parameters:

Name Type Description Default
uuid str

UUID

required

Returns:

Name Type Description
str str

DOM as a string.

Reference

https://urlscan.io/docs/api/#dom

Source code in src/urlscan/client.py
484
485
486
487
488
489
490
491
492
493
494
495
496
497
def get_dom(self, uuid: str) -> str:
    """Get a DOM of a scan by UUID.

    Args:
        uuid (str): UUID

    Returns:
        str: DOM as a string.

    Reference:
        https://urlscan.io/docs/api/#dom

    """
    return self.get_text(f"/dom/{uuid}/")

get_quotas()

Get available and used API quotas.

Returns:

Name Type Description
dict dict

API quotas.

Reference

https://docs.urlscan.io/apis/urlscan-openapi/generic/quotas

Source code in src/urlscan/client.py
817
818
819
820
821
822
823
824
825
826
827
def get_quotas(self) -> dict:
    """Get available and used API quotas.

    Returns:
        dict: API quotas.

    Reference:
        https://docs.urlscan.io/apis/urlscan-openapi/generic/quotas

    """
    return self.get_json("/api/v1/quotas")

get_response(file_hash)

Get a (Script|Document|Fetch|XHR) response in plain text format by SHA256 hash.

Parameters:

Name Type Description Default
file_hash str

SHA256 hash of the response.

required

Returns:

Name Type Description
str str

Response content as a string.

Reference

https://docs.urlscan.io/apis/urlscan-openapi/scanning/response

Source code in src/urlscan/client.py
499
500
501
502
503
504
505
506
507
508
509
510
511
512
def get_response(self, file_hash: str) -> str:
    """Get a (Script|Document|Fetch|XHR) response in plain text format by SHA256 hash.

    Args:
        file_hash (str): SHA256 hash of the response.

    Returns:
        str: Response content as a string.

    Reference:
        https://docs.urlscan.io/apis/urlscan-openapi/scanning/response

    """
    return self.get_text(f"/responses/{file_hash}/")

get_result(uuid)

Get a result of a scan by UUID.

Parameters:

Name Type Description Default
uuid str

UUID.

required

Returns:

Name Type Description
Dict dict

Scan result.

Reference

https://urlscan.io/docs/api/#result

Source code in src/urlscan/client.py
451
452
453
454
455
456
457
458
459
460
461
462
463
464
def get_result(self, uuid: str) -> dict:
    """Get a result of a scan by UUID.

    Args:
        uuid (str): UUID.

    Returns:
        Dict: Scan result.

    Reference:
        https://urlscan.io/docs/api/#result

    """
    return self.get_json(f"/api/v1/result/{uuid}/")

get_screenshot(uuid)

Get a screenshot of a scan by UUID.

Parameters:

Name Type Description Default
uuid str

UUID.

required

Returns:

Type Description
BytesIO

Screenshot (img/png) as bytes.

Reference

https://urlscan.io/docs/api/#screenshot

Source code in src/urlscan/client.py
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
def get_screenshot(self, uuid: str) -> BytesIO:
    """Get a screenshot of a scan by UUID.

    Args:
        uuid (str): UUID.

    Returns:
        : Screenshot (img/png) as bytes.

    Reference:
        https://urlscan.io/docs/api/#screenshot

    """
    res = self._get(f"/screenshots/{uuid}.png")
    bio = BytesIO(res.content)
    bio.name = res.basename
    return bio

get_user_agents()

Get grouped user agents to use with the Scan API.

Returns:

Name Type Description
dict dict

Available user agents.

Reference

https://docs.urlscan.io/apis/urlscan-openapi/scanning/useragents

Source code in src/urlscan/client.py
805
806
807
808
809
810
811
812
813
814
815
def get_user_agents(self) -> dict:
    """Get grouped user agents to use with the Scan API.

    Returns:
        dict: Available user agents.

    Reference:
        https://docs.urlscan.io/apis/urlscan-openapi/scanning/useragents

    """
    return self.get_json("/api/v1/userAgents")

scan(url, *, visibility, tags=None, customagent=None, referer=None, override_safety=None, country=None)

Scan a given URL.

Parameters:

Name Type Description Default
url str

URL to scan.

required
visibility VisibilityType

Visibility of the scan. Can be "public", "private", or "unlisted".

required
tags list[str] | None

Tags to be attached. Defaults to None.

None
customagent str | None

Custom user agent. Defaults to None.

None
referer str | None

Referer. Defaults to None.

None
override_safety Any

If set to any value, this will disable reclassification of URLs with potential PII in them. Defaults to None.

None
country str | None

Specify which country the scan should be performed from (2-Letter ISO-3166-1 alpha-2 country. Defaults to None.

None

Returns:

Name Type Description
dict dict

Scan response.

Reference

https://urlscan.io/docs/api/#scan

Source code in src/urlscan/client.py
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
def scan(
    self,
    url: str,
    *,
    visibility: VisibilityType,
    tags: list[str] | None = None,
    customagent: str | None = None,
    referer: str | None = None,
    override_safety: Any = None,
    country: str | None = None,
) -> dict:
    """Scan a given URL.

    Args:
        url (str): URL to scan.
        visibility (VisibilityType): Visibility of the scan. Can be "public", "private", or "unlisted".
        tags (list[str] | None, optional): Tags to be attached. Defaults to None.
        customagent (str | None, optional): Custom user agent. Defaults to None.
        referer (str | None, optional): Referer. Defaults to None.
        override_safety (Any, optional): If set to any value, this will disable reclassification of URLs with potential PII in them. Defaults to None.
        country (str | None, optional): Specify which country the scan should be performed from (2-Letter ISO-3166-1 alpha-2 country. Defaults to None.

    Returns:
        dict: Scan response.

    Reference:
        https://urlscan.io/docs/api/#scan

    """
    data = _compact(
        {
            "url": url,
            "tags": tags,
            "visibility": visibility,
            "customagent": customagent,
            "referer": referer,
            "overrideSafety": override_safety,
            "country": country,
        }
    )
    res = self._post("/api/v1/scan/", json=data)
    json_res = self._response_to_json(res)

    json_visibility = json_res.get("visibility")
    if json_visibility is not None and json_visibility != visibility:
        logger.warning(f"Visibility is enforced to {json_visibility}.")

    # memoize the scan UUID & timestamp
    uuid = json_res.get("uuid")
    if isinstance(uuid, str):
        self._scan_uuid_timestamp_memo[uuid] = time.time()

    return json_res

scan_and_get_result(url, visibility, tags=None, customagent=None, referer=None, override_safety=None, country=None, timeout=60.0, interval=1.0, initial_wait=10.0)

Scan a given URL, wait for a result and get it.

Parameters:

Name Type Description Default
url str

URL to scan.

required
visibility VisibilityType

Visibility of the scan. Can be "public", "private", or "unlisted".

required
tags list[str] | None

Tags to be attached. Defaults to None.

None
customagent str | None

Custom user agent. Defaults to None.

None
referer str | None

Referer. Defaults to None.

None
override_safety Any

If set to any value, this will disable reclassification of URLs with potential PII in them. Defaults to None.

None
country str | None

Specify which country the scan should be performed from (2-Letter ISO-3166-1 alpha-2 country. Defaults to None.

None
timeout float

Timeout for waiting a result in seconds. Defaults to 60.0.

60.0
interval float

Interval in seconds. Defaults to 1.0.

1.0
initial_wait float | None

Initial wait time in seconds. Set None to disable. Defaults to 10.0.

10.0

Returns:

Name Type Description
dict dict

Scan result.

Reference

https://urlscan.io/docs/api/#scan

Source code in src/urlscan/client.py
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
def scan_and_get_result(
    self,
    url: str,
    visibility: VisibilityType,
    tags: list[str] | None = None,
    customagent: str | None = None,
    referer: str | None = None,
    override_safety: Any = None,
    country: str | None = None,
    timeout: float = 60.0,
    interval: float = 1.0,
    initial_wait: float | None = 10.0,
) -> dict:
    """Scan a given URL, wait for a result and get it.

    Args:
        url (str): URL to scan.
        visibility (VisibilityType): Visibility of the scan. Can be "public", "private", or "unlisted".
        tags (list[str] | None, optional): Tags to be attached. Defaults to None.
        customagent (str | None, optional): Custom user agent. Defaults to None.
        referer (str | None, optional): Referer. Defaults to None.
        override_safety (Any, optional): If set to any value, this will disable reclassification of URLs with potential PII in them. Defaults to None.
        country (str | None, optional): Specify which country the scan should be performed from (2-Letter ISO-3166-1 alpha-2 country. Defaults to None.
        timeout (float, optional): Timeout for waiting a result in seconds. Defaults to 60.0.
        interval (float, optional): Interval in seconds. Defaults to 1.0.
        initial_wait (float | None, optional): Initial wait time in seconds. Set None to disable. Defaults to 10.0.

    Returns:
        dict: Scan result.

    Reference:
        https://urlscan.io/docs/api/#scan

    """
    res = self.scan(
        url,
        visibility=visibility,
        tags=tags,
        customagent=customagent,
        referer=referer,
        override_safety=override_safety,
        country=country,
    )
    uuid: str = res["uuid"]
    self.wait_for_result(
        uuid, timeout=timeout, interval=interval, initial_wait=initial_wait
    )
    return self.get_result(uuid)

search(q='', size=100, limit=None, search_after=None, datasource=None, collapse=None)

Search.

Parameters:

Name Type Description Default
q str

Query term. Defaults to "".

''
size int

Number of results returned in a search. Defaults to 100.

100
limit int | None

Maximum number of results that will be returned by the iterator. Defaults to None.

None
search_after str | None

Search after to retrieve next results. Defaults to None.

None
datasource SearchDataSource | None

Datasources to search: scans (urlscan.io), hostnames, incidents, notifications, certificates (urlscan Pro). Defaults to None.

None
collapse str | None

Field to collapse results on. Only works on current page of results. Defaults to None.

None

Returns:

Name Type Description
SearchIterator SearchIterator

Search iterator.

Reference

https://urlscan.io/docs/api/#search

Source code in src/urlscan/client.py
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
def search(
    self,
    q: str = "",
    size: int = 100,
    limit: int | None = None,
    search_after: str | None = None,
    datasource: SearchDataSource | None = None,
    collapse: str | None = None,
) -> SearchIterator:
    """Search.

    Args:
        q (str): Query term. Defaults to "".
        size (int, optional): Number of results returned in a search. Defaults to 100.
        limit (int | None, optional): Maximum number of results that will be returned by the iterator. Defaults to None.
        search_after (str | None, optional): Search after to retrieve next results. Defaults to None.
        datasource (SearchDataSource | None, optional): Datasources to search: scans (urlscan.io), hostnames, incidents, notifications, certificates (urlscan Pro). Defaults to None.
        collapse (str | None, optional): Field to collapse results on. Only works on current page of results. Defaults to None.

    Returns:
        SearchIterator: Search iterator.

    Reference:
        https://urlscan.io/docs/api/#search

    """
    return SearchIterator(
        self,
        path="/api/v1/search/",
        q=q,
        size=size,
        limit=limit,
        search_after=search_after,
        datasource=datasource,
        collapse=collapse,
    )

wait_for_result(uuid, timeout=60.0, interval=1.0, initial_wait=10.0)

Wait for a scan result to be available.

Parameters:

Name Type Description Default
uuid str

UUID of a result.

required
timeout float

Timeout in seconds. Defaults to 60.0.

60.0
interval float

Interval in seconds. Defaults to 1.0.

1.0
initial_wait float | None

Initial wait time in seconds. Set None to disable. Defaults to 10.0.

10.0
Source code in src/urlscan/client.py
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
def wait_for_result(
    self,
    uuid: str,
    timeout: float = 60.0,
    interval: float = 1.0,
    initial_wait: float | None = 10.0,
) -> None:
    """Wait for a scan result to be available.

    Args:
        uuid (str): UUID of a result.
        timeout (float, optional): Timeout in seconds. Defaults to 60.0.
        interval (float, optional): Interval in seconds. Defaults to 1.0.
        initial_wait (float | None, optional): Initial wait time in seconds. Set None to disable. Defaults to 10.0.

    """
    session = self._get_session()
    req = session.build_request("HEAD", f"/api/v1/result/{uuid}/")

    scanned_at = self._scan_uuid_timestamp_memo.get(uuid)
    if scanned_at and initial_wait:
        elapsed = time.time() - scanned_at
        if elapsed < initial_wait:
            time.sleep(initial_wait - elapsed)

    start_time = time.time()
    while True:
        res = self._send_request(session, req)
        if res.status_code == 200:
            self._scan_uuid_timestamp_memo.pop(uuid, None)
            return

        if time.time() - start_time > timeout:
            raise TimeoutError("Timeout waiting for scan result.")

        time.sleep(interval)