Skip to content

Reference#

defensive_file_scanner.create_file#

Generate a test file with dummy bytes and optional bloat to simulate malware.

main(filepath, start=1000, bloat=1000, end=0) #

Write a file with random bits as the 'program' and optional bloat.

Source code in defensive_file_scanner/create_file.py
@click.command()
@click.option(
    "-f",
    "--filepath",
    type=click.Path(path_type=Path),
    default=Path("./testing/file.out"),
    help="Filepath to save the file. Default = './testing/file.out'",
)
@click.option(
    "-s",
    "--start",
    default=1_000,
    help="Number of hex digits in the simulated program at the start of file.",
)
@click.option(
    "-b",
    "--bloat",
    default=1_000,
    help=(  # fmt: off
        "Number of bloat hex digits or '\x00' (which would be 2) in the file."
    ),  # fmt: on
)
@click.option(
    "-e",
    "--end",
    default=0,
    help="Number of hex digits in the simulated program at the end of file.",
)
def main(filepath: Path, start: int = 1_000, bloat: int = 1_000, end: int = 0):
    """
    Write a file with random bits as the 'program' and optional bloat.
    """
    logger.info(f"Creating file: {filepath.resolve()}")
    write_file(filepath, start, bloat, end)
    size_str: str
    if (size := os.stat(filepath).st_size) < 1_000:
        size_str = f"{size:,.2f} B"
    elif size < (1_000 * 1024):
        size_str = f"{size / 1024:,.2f} KB"
    elif size < (1_000 * 1024**2):
        size_str = f"{size / 1024 ** 2:,.2f} MB"
    else:
        size_str = f"{size / 1024 ** 3:,.2f} GB"
    logger.info(f"File created of size: {size_str}")
    return 0

random_bytes(length=1000) #

Generate random bytes in hex format.

Parameters:

Name Type Description Default
length int

Length of random hex digits.

1000

Returns:

Name Type Description
bytes bytes

A byte string of hex digits of a given length.

Source code in defensive_file_scanner/create_file.py
def random_bytes(length: int = 1000) -> bytes:
    """
    Generate random bytes in hex format.

    Args:
        length (int): Length of random hex digits.

    Returns:
        bytes: A byte string of hex digits of a given length.
    """
    program = "".join(
        repeat_function(
            random.choice,
            string.hexdigits,
            repeats=length,
        )
    )
    return b"".fromhex(program)

repeat_function(func, *args, repeats=1000, **kwargs) #

Run a function n times with given argument and keyword arguments.

Parameters:

Name Type Description Default
func Callable[[...], ...]

Function to run.

required
*args Any

Positional arguments for the function.

()
repeats int

Number of times to repeat the function call.

1000
**kwargs Any

Keyword arguments for the function.

{}

Returns:

Name Type Description
Generator Generator

With for the function to be called n number of times.

Yields:

Name Type Description
Any Generator

Result from the function call.

Source code in defensive_file_scanner/create_file.py
def repeat_function(
    func: Callable[..., Any],
    *args: Any,
    repeats: int = 1_000,
    **kwargs: Any,
) -> Generator:
    """
    Run a function n times with given argument and keyword arguments.

    Args:
        func (Callable[[...], ...]): Function to run.
        *args (Any): Positional arguments for the function.
        repeats (int): Number of times to repeat the function call.
        **kwargs (Any): Keyword arguments for the function.

    Returns:
        Generator: With for the function to be called n number of times.

    Yields:
        Any: Result from the function call.
    """
    for _ in range(repeats):
        yield func(*args, **kwargs)

write_file(path, program_start=100000, bloat=100000, program_end=0) #

Writes the hex code for a test file.

Parameters:

Name Type Description Default
path Path

A file path for the file.

required
program_start int

Length of the "program" code at start of file.

100000
bloat int

Length of bloat or no operation bytes '' in the file.

100000
program_end int

Length of the "program" code at end of file.

0

Returns:

Type Description
None

None

Source code in defensive_file_scanner/create_file.py
def write_file(
    path: Path,
    program_start: int = 100_000,
    bloat: int = 100_000,
    program_end: int = 0,
) -> None:
    """
    Writes the hex code for a test file.

    Args:
        path (Path): A file path for the file.
        program_start (int): Length of the "program" code at start of file.
        bloat (int): Length of bloat or no operation bytes '\x00' in the file.
        program_end (int): Length of the "program" code at end of file.

    Returns:
        None
    """
    with open(path, "wb") as f:
        program: bytes = b""
        if program_start:
            program += random_bytes(program_start)
        if bloat:
            program += b"\x00" * bloat
        if program_end:
            program += random_bytes(program_end)
        f.write(program)

defensive_file_scanner.scan#

Malware #

Malware Object

Source code in defensive_file_scanner/scan.py
class Malware:
    """
    Malware Object
    """

    def __init__(self, file: PurePath | str):
        """
        Initialise a suspected Malware Object.

        Args:
            file (PurePath | str): Path of the file to scan.
        """
        self.file: PurePath = Path(file)
        self.counter: Counter = Counter()
        logger.info(f"Initiating Scan of file: {file}")
        logger.debug(repr(self))

    def __str__(self):
        return f"file='{self.file}'"

    def __repr__(self):
        return f"{type(self).__name__}({str(self)})"

    def test_whole_file(self) -> list[tuple[str, int]]:
        """
        Scans the whole file and counts the hexbits.

        Returns:
            The count of the hex in the byte code ordered from most
            to least common.

        Todo:
            - [ ] Add tqdm loading bar.
        """
        with open(self.file, "rb", buffering=1024) as f:
            while True:
                if not (line := f.read(100)):
                    break
                self.counter.update(line)
        logger.success(f"Most common bit: {self.counter.most_common(1)[0]}")
        return self.counter.most_common()

    @property
    def most_common(self) -> tuple[str, int]:
        """
        The most come hex bit and frequency.
        """
        if self.counter:
            return self.counter.most_common(1)[0]
        return self.test_whole_file()[0]

    @property
    def total_bits(self) -> int:
        """
        The total number if hex bits.
        """
        return self.counter.total()

    @property
    def ratio(self):
        """
        Ratio between most common count and total count.
        """
        return self.most_common[1] / self.total_bits

    @property
    def file_hash(self) -> str:
        raise NotImplementedError

    def sum_of_bit(self) -> int:
        raise NotImplementedError

most_common: tuple[str, int] property #

The most come hex bit and frequency.

ratio property #

Ratio between most common count and total count.

total_bits: int property #

The total number if hex bits.

__init__(file) #

Initialise a suspected Malware Object.

Parameters:

Name Type Description Default
file PurePath | str

Path of the file to scan.

required
Source code in defensive_file_scanner/scan.py
def __init__(self, file: PurePath | str):
    """
    Initialise a suspected Malware Object.

    Args:
        file (PurePath | str): Path of the file to scan.
    """
    self.file: PurePath = Path(file)
    self.counter: Counter = Counter()
    logger.info(f"Initiating Scan of file: {file}")
    logger.debug(repr(self))

test_whole_file() #

Scans the whole file and counts the hexbits.

Returns:

Type Description
list[tuple[str, int]]

The count of the hex in the byte code ordered from most

list[tuple[str, int]]

to least common.

Todo
  • Add tqdm loading bar.
Source code in defensive_file_scanner/scan.py
def test_whole_file(self) -> list[tuple[str, int]]:
    """
    Scans the whole file and counts the hexbits.

    Returns:
        The count of the hex in the byte code ordered from most
        to least common.

    Todo:
        - [ ] Add tqdm loading bar.
    """
    with open(self.file, "rb", buffering=1024) as f:
        while True:
            if not (line := f.read(100)):
                break
            self.counter.update(line)
    logger.success(f"Most common bit: {self.counter.most_common(1)[0]}")
    return self.counter.most_common()

Last update: April 1, 2023
Created: April 1, 2023