Skip to content

Utils

urlscan.utils

Utility functions for urlscan.io API client.

extract(path, outdir)

Extract a compressed file to the specified output directory.

Parameters:

Name Type Description Default
path StrOrBytesPath

The path to the compressed file.

required
outdir StrOrBytesPath

The directory to extract the files to.

required

Returns:

Type Description

None

Source code in src/urlscan/utils.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
def extract(path: StrOrBytesPath, outdir: StrOrBytesPath):
    """Extract a compressed file to the specified output directory.

    Args:
        path (StrOrBytesPath): The path to the compressed file.
        outdir (StrOrBytesPath): The directory to extract the files to.

    Returns:
        None

    """
    basename = os.path.basename(str(path))
    if basename.endswith(".tar.gz"):
        with tarfile.open(path, mode="r:*", ignore_zeros=True) as tar:
            tar.extractall(outdir)

        return

    if basename.endswith(".gz"):
        filename = basename.removesuffix(".gz")

        with (
            gzip.open(path, "rb") as f_in,
            open(os.path.join(str(outdir), filename), "wb") as f_out,
        ):
            f_out.write(f_in.read())

        return

    raise ValueError(f"Unsupported file type: {basename}")