# Copyright © The Debusine Developers
# See the AUTHORS file at the top-level directory of this distribution
#
# This file is part of Debusine. It is subject to the license terms
# in the LICENSE file found in the top-level directory of this
# distribution. No part of Debusine, including this file, may be copied,
# modified, propagated, or distributed except according to the terms
# contained in the LICENSE file.

"""View for DebDiff work request."""

import re
from collections import defaultdict
from enum import StrEnum
from typing import Any, TypedDict

from unidiff import PatchSet

from debusine.artifacts.models import (
    ArtifactCategory,
    DebianBinaryPackage,
    DebianUpload,
    TaskTypes,
    get_source_package_name,
    get_source_package_version,
)
from debusine.db.models import Artifact, ArtifactRelation, FileInArtifact
from debusine.tasks import DebDiff
from debusine.web.views.artifacts import ArtifactPlugin
from debusine.web.views.files import FileWidget
from debusine.web.views.work_request import WorkRequestPlugin


class OperationEnum(StrEnum):
    """Operations that are done in a file in the patch."""

    ADDED = "added"
    REMOVED = "removed"
    MODIFIED = "modified"


class DebDiffFileSummary(TypedDict):
    """Summary of a file-level change in a source debdiff."""

    path: str
    diff_line_number: int
    operation: OperationEnum


class DebDiffSection(StrEnum):
    """Sections of a debdiff output."""

    ADDED_FILES = "added_files"
    REMOVED_FILES = "removed_files"
    CONTROL_DIFFS = "control_diffs"
    CONTROL_DIFFS_PACKAGES = "control_diffs_packages"
    CONTROL_DIFFS_PACKAGES_NO_CHANGES = "control_diffs_packages_no_changes"


class DebDiffBinaryParseContext(TypedDict):
    """Defines the result structure of a parsed debdiff binary section."""

    added_files: list[str]
    removed_files: list[str]
    control_diffs: list[str]
    control_diffs_packages: dict[str, list[str]]
    control_diffs_packages_no_changes: list[str]


class DebDiffBinaryParsed(TypedDict):
    """Defines the result structure as it's going to be used by the template."""

    added_files: str
    removed_files: str
    control_diffs: str
    control_diffs_packages: dict[str, str]
    control_diffs_packages_items: list[tuple[str, str]]
    control_diffs_packages_no_changes: list[str]


class DebDiffViewArtifactPlugin(ArtifactPlugin):
    """View for DebDiff Artifact."""

    artifact_category = ArtifactCategory.DEBDIFF
    template_name = "web/artifact-detail.html"
    name = "debdiff"

    object_name = "artifact"

    @classmethod
    def _summarise_debdiff_source(
        cls, debdiff: str
    ) -> list[DebDiffFileSummary]:
        patch = PatchSet(debdiff)

        summary: list[DebDiffFileSummary] = []

        lines = None
        last_binary_line = 0

        for file in patch:
            if file.is_added_file:
                operation = OperationEnum.ADDED
            elif file.is_removed_file:
                operation = OperationEnum.REMOVED
            elif file.is_modified_file:
                operation = OperationEnum.MODIFIED
            else:
                raise ValueError(f"Unexpected file status in diff: {file.path}")

            if file.is_binary_file:
                # find the line in the debdiff report of this binary change
                # Unidiff is not giving this information at the moment
                # See https://github.com/matiasb/python-unidiff/issues/122
                if lines is None:
                    # Avoid splitting for each binary file
                    lines = debdiff.splitlines()

                # Always true in practice, but the type system doesn't know
                # that.
                assert file.patch_info is not None

                diff_line_number = (
                    lines.index(file.patch_info[0].rstrip(), last_binary_line)
                    + 1
                )
                # keep searching from previous one
                last_binary_line = diff_line_number
                summary.append(
                    {
                        "path": file.path,
                        "diff_line_number": diff_line_number,
                        "operation": operation,
                    }
                )
            else:
                # Always true in practice, but the type system doesn't know
                # that.
                assert file[0][0].diff_line_no is not None

                # first diff line number of the first hunk of the file
                #
                # subtracts 3: skip back over the one-line hunk header (@@)
                # and the two-line file header (--- and +++)
                diff_line_number = file[0][0].diff_line_no - 3

                summary.append(
                    {
                        "path": file.path,
                        "diff_line_number": diff_line_number,
                        "operation": operation,
                    }
                )

        return summary

    @staticmethod
    def _detect_section_package(
        line: str,
    ) -> tuple[DebDiffSection | None, str | None]:
        """Detect if a line is the start of a new debdiff section or package."""
        control_files_re = re.compile(r"^Control files of package (\S+):")
        no_differences_re = re.compile(
            r"^No differences were encountered between "
            r"the control files of package (\S+)$"
        )

        if line in (
            "Files in second .changes but not in first",
            "Files in second .deb but not in first",
            "Files in second set of .debs but not in first",
        ):
            return DebDiffSection.ADDED_FILES, None

        elif line in (
            "Files in first .changes but not in second",
            "Files in first .deb but not in second",
            "Files in first set of .debs but not in second",
        ):
            return DebDiffSection.REMOVED_FILES, None

        elif line.startswith("Control files: lines which differ"):
            return DebDiffSection.CONTROL_DIFFS, None

        elif m := control_files_re.match(line):
            return DebDiffSection.CONTROL_DIFFS_PACKAGES, m.group(1)

        elif m := no_differences_re.match(line):
            return DebDiffSection.CONTROL_DIFFS_PACKAGES_NO_CHANGES, m.group(1)

        return None, None

    @staticmethod
    def _append_to_result(
        result: DebDiffBinaryParseContext,
        section: DebDiffSection,
        package: str | None,
        line: str,
    ) -> None:
        value = result[section.value]

        match value:
            case list():
                value.append(line)
            case dict():
                subsection = value

                # If it's a dict we have a package and we have information for
                # each package to add
                assert package is not None

                subsection[package].append(line)

            case _ as unreachable:
                raise NotImplementedError(f"{unreachable!r} not implemented")

    @classmethod
    def _parse_debdiff_binary(cls, debdiff: str) -> DebDiffBinaryParsed:
        result: DebDiffBinaryParseContext = {
            DebDiffSection.ADDED_FILES.value: [],
            DebDiffSection.REMOVED_FILES.value: [],
            DebDiffSection.CONTROL_DIFFS.value: [],
            DebDiffSection.CONTROL_DIFFS_PACKAGES.value: defaultdict(list),
            DebDiffSection.CONTROL_DIFFS_PACKAGES_NO_CHANGES.value: [],
        }

        current_section: DebDiffSection | None = None
        current_package: str | None = None
        has_data = False

        for line in debdiff.splitlines():
            line = line.rstrip()

            if set(line) == {"-"}:
                # Line is a header underline
                continue

            if line == "":
                current_section = None
                current_package = None
                continue

            if (
                line
                == "No differences were encountered between the control files"
            ):
                has_data = True
                break

            section, package = cls._detect_section_package(line)

            has_data |= section is not None

            if section == DebDiffSection.CONTROL_DIFFS_PACKAGES_NO_CHANGES:
                assert package is not None
                result[
                    DebDiffSection.CONTROL_DIFFS_PACKAGES_NO_CHANGES.value
                ].append(package)

            if section is not None:
                current_section = section
                current_package = package
                continue

            if current_section is None and has_data:
                raise ValueError(f"Failed to parse line: {line!r}")

            if current_section is not None:
                cls._append_to_result(
                    result, current_section, current_package, line
                )

        if not has_data:
            raise ValueError("Cannot parse any information from debdiff output")

        return cls._render_binary_parsed(result)

    def get_context_data(self) -> dict[str, Any]:
        """Return the context."""
        slug = "debdiff"

        specialized_tab = {
            "specialized_tab": {
                "label": "DebDiff",
                "slug": slug,
                "template": "web/_debdiff-artifact-detail.html",
            }
        }

        try:
            debdiff_txt_file_in_artifact = self.artifact.fileinartifact_set.get(
                path=DebDiff.CAPTURE_OUTPUT_FILENAME
            )
        except FileInArtifact.DoesNotExist:
            return {
                **specialized_tab,
                "debdiff_artifact_error": "Artifact does not contain "
                f"{DebDiff.CAPTURE_OUTPUT_FILENAME} ",
            }

        debdiff_contents = self.read_file(debdiff_txt_file_in_artifact).decode(
            errors="replace"
        )
        debdiff_absolute_path = debdiff_txt_file_in_artifact.get_absolute_url()

        related_to = self.artifact.relations.filter(
            type=ArtifactRelation.Relations.RELATES_TO
        ).first()

        if related_to is None:
            return {
                **specialized_tab,
                "debdiff_artifact_error": "Artifact is not complete "
                "(missing 'relates-to' relation)",
            }

        if related_to.target.category == ArtifactCategory.SOURCE_PACKAGE:
            # The original and new artifacts are source artifacts
            debdiff_parser_error = None
            try:
                debdiff_source_summary = self._summarise_debdiff_source(
                    debdiff_contents
                )
            except ValueError as exc:
                debdiff_source_summary = None
                debdiff_parser_error = str(exc)

            debdiff_source_file_widget = FileWidget.create(
                debdiff_txt_file_in_artifact,
                file_tag=slug,
            )

            return {
                **specialized_tab,
                "debdiff_artifact_url": debdiff_absolute_path,
                "debdiff_source_summary": debdiff_source_summary,
                "debdiff_source_file_widget": debdiff_source_file_widget,
                "debdiff_parser_error": debdiff_parser_error,
                "debdiff_differences_reported": debdiff_source_summary
                is not None
                and len(debdiff_source_summary) != 0,
                "debdiff_file_tag": slug,
            }

        else:
            # The original and new artifacts were binary artifacts
            debdiff_parser_error = None
            try:
                debdiff_binary_parsed = self._parse_debdiff_binary(
                    debdiff_contents
                )

            except ValueError as exc:
                debdiff_binary_parsed = None
                debdiff_parser_error = str(exc)

            return {
                **specialized_tab,
                "debdiff_artifact_url": debdiff_absolute_path,
                "debdiff_binary_parsed": debdiff_binary_parsed,
                "debdiff_parser_error": debdiff_parser_error,
                "debdiff_differences_reported": (
                    debdiff_binary_parsed
                    and any(
                        debdiff_binary_parsed[section.value]
                        for section in DebDiffSection
                    )
                ),
            }

    @staticmethod
    def _render_binary_parsed(
        parsed: DebDiffBinaryParseContext,
    ) -> DebDiffBinaryParsed:
        """Convert lists to template-friendly strings."""
        rendered_packages = {
            pkg: "\n".join(lines)
            for pkg, lines in parsed["control_diffs_packages"].items()
        }
        return {
            "added_files": "\n".join(parsed["added_files"]),
            "removed_files": "\n".join(parsed["removed_files"]),
            "control_diffs": "\n".join(parsed["control_diffs"]),
            "control_diffs_packages": rendered_packages,
            "control_diffs_packages_items": list(rendered_packages.items()),
            "control_diffs_packages_no_changes": list(
                parsed["control_diffs_packages_no_changes"]
            ),
        }


class DebDiffViewWorkRequestPlugin(WorkRequestPlugin):
    """View for DebDiff work request."""

    task_type = TaskTypes.WORKER
    task_name = "debdiff"

    def do_get_description_data(self) -> dict[str, Any]:
        """Return data used for the description."""
        dynamic_data = self.task.dynamic_data
        assert dynamic_data is not None

        data: dict[str, Any] = {}

        if (
            source_artifacts := dynamic_data.input_source_artifacts_ids
        ) is not None:
            data["source_artifact_original_id"] = source_artifacts[0]
            data["source_artifact_new_id"] = source_artifacts[1]

        elif (
            binary_artifacts := dynamic_data.input_binary_artifacts_ids
        ) is not None:
            source_package_names: set[str] = set()

            for binary_artifact in Artifact.objects.filter(
                id__in=binary_artifacts[0]
            ):
                artifact_data = binary_artifact.create_data()

                assert isinstance(
                    artifact_data,
                    (DebianBinaryPackage, DebianUpload),
                )
                src_pkg_name = get_source_package_name(artifact_data)
                src_pkg_version = get_source_package_version(artifact_data)
                source_package_names.add(f"{src_pkg_name}_{src_pkg_version}")

            data["source_package_names"] = sorted(source_package_names)
            data["binary_artifacts_new_count"] = len(binary_artifacts[1])
            data["binary_artifacts_architecture"] = (
                self.task.data.build_architecture
            )
        else:
            # This branch is for WorkRequests where
            # dynamic data is quite incomplete (old WorkRequests)
            data["parameter_summary"] = dynamic_data.parameter_summary

        data["environment_id"] = dynamic_data.environment_id
        data["extra_arguments"] = self.task.data.extra_flags

        return data

    def get_context_data(self) -> dict[str, Any]:
        """Return context_data."""
        return {
            "description_template": "web/_debdiff-description.html",
            "description_data": self.get_description_data(),
        }
