|
591 | 591 | "outputs": [],
|
592 | 592 | "source": [
|
593 | 593 | "#!/usr/bin/env python3\n",
|
594 |
| - "import os\n", |
| 594 | + "\n", |
| 595 | + "\"\"\"\n", |
| 596 | + "A self-contained **pure-Python 3.9+** utility for applying human-readable\n", |
| 597 | + "“pseudo-diff” patch files to a collection of text files.\n", |
| 598 | + "\"\"\"\n", |
| 599 | + "\n", |
| 600 | + "from __future__ import annotations\n", |
| 601 | + "\n", |
| 602 | + "import pathlib\n", |
595 | 603 | "from dataclasses import dataclass, field\n",
|
596 | 604 | "from enum import Enum\n",
|
597 |
| - "from typing import Callable, Dict, List, NoReturn, Optional, Tuple, Union\n", |
| 605 | + "from typing import (\n", |
| 606 | + " Callable,\n", |
| 607 | + " Dict,\n", |
| 608 | + " List,\n", |
| 609 | + " Optional,\n", |
| 610 | + " Tuple,\n", |
| 611 | + " Union,\n", |
| 612 | + ")\n", |
598 | 613 | "\n",
|
599 | 614 | "\n",
|
| 615 | + "# --------------------------------------------------------------------------- #\n", |
| 616 | + "# Domain objects\n", |
| 617 | + "# --------------------------------------------------------------------------- #\n", |
600 | 618 | "class ActionType(str, Enum):\n",
|
601 | 619 | " ADD = \"add\"\n",
|
602 | 620 | " DELETE = \"delete\"\n",
|
|
616 | 634 | " changes: Dict[str, FileChange] = field(default_factory=dict)\n",
|
617 | 635 | "\n",
|
618 | 636 | "\n",
|
619 |
| - "def assemble_changes(\n", |
620 |
| - " orig: Dict[str, Optional[str]],\n", |
621 |
| - " updated_files: Dict[str, Optional[str]],\n", |
622 |
| - ") -> Commit:\n", |
623 |
| - " commit = Commit()\n", |
624 |
| - " for path, new_content in updated_files.items():\n", |
625 |
| - " old_content = orig.get(path)\n", |
626 |
| - " if old_content == new_content:\n", |
627 |
| - " continue\n", |
628 |
| - " if old_content is not None and new_content is not None:\n", |
629 |
| - " commit.changes[path] = FileChange(\n", |
630 |
| - " type=ActionType.UPDATE,\n", |
631 |
| - " old_content=old_content,\n", |
632 |
| - " new_content=new_content,\n", |
633 |
| - " )\n", |
634 |
| - " elif new_content is not None:\n", |
635 |
| - " commit.changes[path] = FileChange(\n", |
636 |
| - " type=ActionType.ADD,\n", |
637 |
| - " new_content=new_content,\n", |
638 |
| - " )\n", |
639 |
| - " elif old_content is not None:\n", |
640 |
| - " commit.changes[path] = FileChange(\n", |
641 |
| - " type=ActionType.DELETE,\n", |
642 |
| - " old_content=old_content,\n", |
643 |
| - " )\n", |
644 |
| - " else:\n", |
645 |
| - " assert False\n", |
646 |
| - " return commit\n", |
| 637 | + "# --------------------------------------------------------------------------- #\n", |
| 638 | + "# Exceptions\n", |
| 639 | + "# --------------------------------------------------------------------------- #\n", |
| 640 | + "class DiffError(ValueError):\n", |
| 641 | + " \"\"\"Any problem detected while parsing or applying a patch.\"\"\"\n", |
647 | 642 | "\n",
|
648 | 643 | "\n",
|
| 644 | + "# --------------------------------------------------------------------------- #\n", |
| 645 | + "# Helper dataclasses used while parsing patches\n", |
| 646 | + "# --------------------------------------------------------------------------- #\n", |
649 | 647 | "@dataclass\n",
|
650 | 648 | "class Chunk:\n",
|
651 |
| - " orig_index: int = -1 # line index of the first line in the original file\n", |
| 649 | + " orig_index: int = -1\n", |
652 | 650 | " del_lines: List[str] = field(default_factory=list)\n",
|
653 | 651 | " ins_lines: List[str] = field(default_factory=list)\n",
|
654 | 652 | "\n",
|
|
666 | 664 | " actions: Dict[str, PatchAction] = field(default_factory=dict)\n",
|
667 | 665 | "\n",
|
668 | 666 | "\n",
|
| 667 | + "# --------------------------------------------------------------------------- #\n", |
| 668 | + "# Patch text parser\n", |
| 669 | + "# --------------------------------------------------------------------------- #\n", |
669 | 670 | "@dataclass\n",
|
670 | 671 | "class Parser:\n",
|
671 |
| - " current_files: Dict[str, str] = field(default_factory=dict)\n", |
672 |
| - " lines: List[str] = field(default_factory=list)\n", |
| 672 | + " current_files: Dict[str, str]\n", |
| 673 | + " lines: List[str]\n", |
673 | 674 | " index: int = 0\n",
|
674 | 675 | " patch: Patch = field(default_factory=Patch)\n",
|
675 | 676 | " fuzz: int = 0\n",
|
676 | 677 | "\n",
|
| 678 | + " # ------------- low-level helpers -------------------------------------- #\n", |
| 679 | + " def _cur_line(self) -> str:\n", |
| 680 | + " if self.index >= len(self.lines):\n", |
| 681 | + " raise DiffError(\"Unexpected end of input while parsing patch\")\n", |
| 682 | + " return self.lines[self.index]\n", |
| 683 | + "\n", |
| 684 | + " @staticmethod\n", |
| 685 | + " def _norm(line: str) -> str:\n", |
| 686 | + " \"\"\"Strip CR so comparisons work for both LF and CRLF input.\"\"\"\n", |
| 687 | + " return line.rstrip(\"\\r\")\n", |
| 688 | + "\n", |
| 689 | + " # ------------- scanning convenience ----------------------------------- #\n", |
677 | 690 | " def is_done(self, prefixes: Optional[Tuple[str, ...]] = None) -> bool:\n",
|
678 | 691 | " if self.index >= len(self.lines):\n",
|
679 | 692 | " return True\n",
|
680 |
| - " if prefixes and self.lines[self.index].startswith(prefixes):\n", |
| 693 | + " if (\n", |
| 694 | + " prefixes\n", |
| 695 | + " and len(prefixes) > 0\n", |
| 696 | + " and self._norm(self._cur_line()).startswith(prefixes)\n", |
| 697 | + " ):\n", |
681 | 698 | " return True\n",
|
682 | 699 | " return False\n",
|
683 | 700 | "\n",
|
684 | 701 | " def startswith(self, prefix: Union[str, Tuple[str, ...]]) -> bool:\n",
|
685 |
| - " assert self.index < len(self.lines), f\"Index: {self.index} >= {len(self.lines)}\"\n", |
686 |
| - " if self.lines[self.index].startswith(prefix):\n", |
687 |
| - " return True\n", |
688 |
| - " return False\n", |
689 |
| - "\n", |
690 |
| - " def read_str(self, prefix: str = \"\", return_everything: bool = False) -> str:\n", |
691 |
| - " assert self.index < len(self.lines), f\"Index: {self.index} >= {len(self.lines)}\"\n", |
692 |
| - " if self.lines[self.index].startswith(prefix):\n", |
693 |
| - " if return_everything:\n", |
694 |
| - " text = self.lines[self.index]\n", |
695 |
| - " else:\n", |
696 |
| - " text = self.lines[self.index][len(prefix) :]\n", |
| 702 | + " return self._norm(self._cur_line()).startswith(prefix)\n", |
| 703 | + "\n", |
| 704 | + " def read_str(self, prefix: str) -> str:\n", |
| 705 | + " \"\"\"\n", |
| 706 | + " Consume the current line if it starts with *prefix* and return the text\n", |
| 707 | + " **after** the prefix. Raises if prefix is empty.\n", |
| 708 | + " \"\"\"\n", |
| 709 | + " if prefix == \"\":\n", |
| 710 | + " raise ValueError(\"read_str() requires a non-empty prefix\")\n", |
| 711 | + " if self._norm(self._cur_line()).startswith(prefix):\n", |
| 712 | + " text = self._cur_line()[len(prefix) :]\n", |
697 | 713 | " self.index += 1\n",
|
698 | 714 | " return text\n",
|
699 | 715 | " return \"\"\n",
|
700 | 716 | "\n",
|
701 |
| - " def parse(self) -> NoReturn:\n", |
| 717 | + " def read_line(self) -> str:\n", |
| 718 | + " \"\"\"Return the current raw line and advance.\"\"\"\n", |
| 719 | + " line = self._cur_line()\n", |
| 720 | + " self.index += 1\n", |
| 721 | + " return line\n", |
| 722 | + "\n", |
| 723 | + " # ------------- public entry point -------------------------------------- #\n", |
| 724 | + " def parse(self) -> None:\n", |
702 | 725 | " while not self.is_done((\"*** End Patch\",)):\n",
|
| 726 | + " # ---------- UPDATE ---------- #\n", |
703 | 727 | " path = self.read_str(\"*** Update File: \")\n",
|
704 | 728 | " if path:\n",
|
705 | 729 | " if path in self.patch.actions:\n",
|
706 |
| - " raise DiffError(f\"Update File Error: Duplicate Path: {path}\")\n", |
| 730 | + " raise DiffError(f\"Duplicate update for file: {path}\")\n", |
707 | 731 | " move_to = self.read_str(\"*** Move to: \")\n",
|
708 | 732 | " if path not in self.current_files:\n",
|
709 |
| - " raise DiffError(f\"Update File Error: Missing File: {path}\")\n", |
| 733 | + " raise DiffError(f\"Update File Error - missing file: {path}\")\n", |
710 | 734 | " text = self.current_files[path]\n",
|
711 |
| - " action = self.parse_update_file(text)\n", |
712 |
| - " action.move_path = move_to\n", |
| 735 | + " action = self._parse_update_file(text)\n", |
| 736 | + " action.move_path = move_to or None\n", |
713 | 737 | " self.patch.actions[path] = action\n",
|
714 | 738 | " continue\n",
|
| 739 | + "\n", |
| 740 | + " # ---------- DELETE ---------- #\n", |
715 | 741 | " path = self.read_str(\"*** Delete File: \")\n",
|
716 | 742 | " if path:\n",
|
717 | 743 | " if path in self.patch.actions:\n",
|
718 |
| - " raise DiffError(f\"Delete File Error: Duplicate Path: {path}\")\n", |
| 744 | + " raise DiffError(f\"Duplicate delete for file: {path}\")\n", |
719 | 745 | " if path not in self.current_files:\n",
|
720 |
| - " raise DiffError(f\"Delete File Error: Missing File: {path}\")\n", |
721 |
| - " self.patch.actions[path] = PatchAction(\n", |
722 |
| - " type=ActionType.DELETE,\n", |
723 |
| - " )\n", |
| 746 | + " raise DiffError(f\"Delete File Error - missing file: {path}\")\n", |
| 747 | + " self.patch.actions[path] = PatchAction(type=ActionType.DELETE)\n", |
724 | 748 | " continue\n",
|
| 749 | + "\n", |
| 750 | + " # ---------- ADD ---------- #\n", |
725 | 751 | " path = self.read_str(\"*** Add File: \")\n",
|
726 | 752 | " if path:\n",
|
727 | 753 | " if path in self.patch.actions:\n",
|
728 |
| - " raise DiffError(f\"Add File Error: Duplicate Path: {path}\")\n", |
| 754 | + " raise DiffError(f\"Duplicate add for file: {path}\")\n", |
729 | 755 | " if path in self.current_files:\n",
|
730 |
| - " raise DiffError(f\"Add File Error: File already exists: {path}\")\n", |
731 |
| - " self.patch.actions[path] = self.parse_add_file()\n", |
| 756 | + " raise DiffError(f\"Add File Error - file already exists: {path}\")\n", |
| 757 | + " self.patch.actions[path] = self._parse_add_file()\n", |
732 | 758 | " continue\n",
|
733 |
| - " raise DiffError(f\"Unknown Line: {self.lines[self.index]}\")\n", |
| 759 | + "\n", |
| 760 | + " raise DiffError(f\"Unknown line while parsing: {self._cur_line()}\")\n", |
| 761 | + "\n", |
734 | 762 | " if not self.startswith(\"*** End Patch\"):\n",
|
735 |
| - " raise DiffError(\"Missing End Patch\")\n", |
736 |
| - " self.index += 1\n", |
| 763 | + " raise DiffError(\"Missing *** End Patch sentinel\")\n", |
| 764 | + " self.index += 1 # consume sentinel\n", |
737 | 765 | "\n",
|
738 |
| - " def parse_update_file(self, text: str) -> PatchAction:\n", |
739 |
| - " action = PatchAction(\n", |
740 |
| - " type=ActionType.UPDATE,\n", |
741 |
| - " )\n", |
| 766 | + " # ------------- section parsers ---------------------------------------- #\n", |
| 767 | + " def _parse_update_file(self, text: str) -> PatchAction:\n", |
| 768 | + " action = PatchAction(type=ActionType.UPDATE)\n", |
742 | 769 | " lines = text.split(\"\\n\")\n",
|
743 | 770 | " index = 0\n",
|
744 | 771 | " while not self.is_done(\n",
|
|
752 | 779 | " ):\n",
|
753 | 780 | " def_str = self.read_str(\"@@ \")\n",
|
754 | 781 | " section_str = \"\"\n",
|
755 |
| - " if not def_str:\n", |
756 |
| - " if self.lines[self.index] == \"@@\":\n", |
757 |
| - " section_str = self.lines[self.index]\n", |
758 |
| - " self.index += 1\n", |
| 782 | + " if not def_str and self._norm(self._cur_line()) == \"@@\":\n", |
| 783 | + " section_str = self.read_line()\n", |
| 784 | + "\n", |
759 | 785 | " if not (def_str or section_str or index == 0):\n",
|
760 |
| - " raise DiffError(f\"Invalid Line:\\n{self.lines[self.index]}\")\n", |
| 786 | + " raise DiffError(f\"Invalid line in update section:\\n{self._cur_line()}\")\n", |
| 787 | + "\n", |
761 | 788 | " if def_str.strip():\n",
|
762 | 789 | " found = False\n",
|
763 |
| - " if not [s for s in lines[:index] if s == def_str]:\n", |
764 |
| - " # def str is a skip ahead operator\n", |
| 790 | + " if def_str not in lines[:index]:\n", |
765 | 791 | " for i, s in enumerate(lines[index:], index):\n",
|
766 | 792 | " if s == def_str:\n",
|
767 | 793 | " index = i + 1\n",
|
768 | 794 | " found = True\n",
|
769 | 795 | " break\n",
|
770 |
| - " if not found and not [s for s in lines[:index] if s.strip() == def_str.strip()]:\n", |
771 |
| - " # def str is a skip ahead operator\n", |
| 796 | + " if not found and def_str.strip() not in [\n", |
| 797 | + " s.strip() for s in lines[:index]\n", |
| 798 | + " ]:\n", |
772 | 799 | " for i, s in enumerate(lines[index:], index):\n",
|
773 | 800 | " if s.strip() == def_str.strip():\n",
|
774 | 801 | " index = i + 1\n",
|
775 | 802 | " self.fuzz += 1\n",
|
776 | 803 | " found = True\n",
|
777 | 804 | " break\n",
|
778 |
| - " next_chunk_context, chunks, end_patch_index, eof = peek_next_section(\n", |
779 |
| - " self.lines, self.index\n", |
780 |
| - " )\n", |
781 |
| - " next_chunk_text = \"\\n\".join(next_chunk_context)\n", |
782 |
| - " new_index, fuzz = find_context(lines, next_chunk_context, index, eof)\n", |
| 805 | + "\n", |
| 806 | + " next_ctx, chunks, end_idx, eof = peek_next_section(self.lines, self.index)\n", |
| 807 | + " new_index, fuzz = find_context(lines, next_ctx, index, eof)\n", |
783 | 808 | " if new_index == -1:\n",
|
784 |
| - " if eof:\n", |
785 |
| - " raise DiffError(f\"Invalid EOF Context {index}:\\n{next_chunk_text}\")\n", |
786 |
| - " else:\n", |
787 |
| - " raise DiffError(f\"Invalid Context {index}:\\n{next_chunk_text}\")\n", |
| 809 | + " ctx_txt = \"\\n\".join(next_ctx)\n", |
| 810 | + " raise DiffError(\n", |
| 811 | + " f\"Invalid {'EOF ' if eof else ''}context at {index}:\\n{ctx_txt}\"\n", |
| 812 | + " )\n", |
788 | 813 | " self.fuzz += fuzz\n",
|
789 | 814 | " for ch in chunks:\n",
|
790 | 815 | " ch.orig_index += new_index\n",
|
791 | 816 | " action.chunks.append(ch)\n",
|
792 |
| - " index = new_index + len(next_chunk_context)\n", |
793 |
| - " self.index = end_patch_index\n", |
794 |
| - " continue\n", |
| 817 | + " index = new_index + len(next_ctx)\n", |
| 818 | + " self.index = end_idx\n", |
795 | 819 | " return action\n",
|
796 | 820 | "\n",
|
797 |
| - " def parse_add_file(self) -> PatchAction:\n", |
798 |
| - " lines = []\n", |
| 821 | + " def _parse_add_file(self) -> PatchAction:\n", |
| 822 | + " lines: List[str] = []\n", |
799 | 823 | " while not self.is_done(\n",
|
800 | 824 | " (\"*** End Patch\", \"*** Update File:\", \"*** Delete File:\", \"*** Add File:\")\n",
|
801 | 825 | " ):\n",
|
802 |
| - " s = self.read_str()\n", |
| 826 | + " s = self.read_line()\n", |
803 | 827 | " if not s.startswith(\"+\"):\n",
|
804 |
| - " raise DiffError(f\"Invalid Add File Line: {s}\")\n", |
805 |
| - " s = s[1:]\n", |
806 |
| - " lines.append(s)\n", |
807 |
| - " return PatchAction(\n", |
808 |
| - " type=ActionType.ADD,\n", |
809 |
| - " new_file=\"\\n\".join(lines),\n", |
810 |
| - " )\n", |
| 828 | + " raise DiffError(f\"Invalid Add File line (missing '+'): {s}\")\n", |
| 829 | + " lines.append(s[1:]) # strip leading '+'\n", |
| 830 | + " return PatchAction(type=ActionType.ADD, new_file=\"\\n\".join(lines))\n", |
811 | 831 | "\n",
|
812 | 832 | "\n",
|
813 |
| - "def find_context_core(lines: List[str], context: List[str], start: int) -> Tuple[int, int]:\n", |
| 833 | + "# --------------------------------------------------------------------------- #\n", |
| 834 | + "# Helper functions\n", |
| 835 | + "# --------------------------------------------------------------------------- #\n", |
| 836 | + "def find_context_core(\n", |
| 837 | + " lines: List[str], context: List[str], start: int\n", |
| 838 | + ") -> Tuple[int, int]:\n", |
814 | 839 | " if not context:\n",
|
815 | 840 | " return start, 0\n",
|
816 | 841 | "\n",
|
817 |
| - " # Prefer identical\n", |
818 | 842 | " for i in range(start, len(lines)):\n",
|
819 | 843 | " if lines[i : i + len(context)] == context:\n",
|
820 | 844 | " return i, 0\n",
|
821 |
| - " # RStrip is ok\n", |
822 | 845 | " for i in range(start, len(lines)):\n",
|
823 |
| - " if [s.rstrip() for s in lines[i : i + len(context)]] == [s.rstrip() for s in context]:\n", |
| 846 | + " if [s.rstrip() for s in lines[i : i + len(context)]] == [\n", |
| 847 | + " s.rstrip() for s in context\n", |
| 848 | + " ]:\n", |
824 | 849 | " return i, 1\n",
|
825 |
| - " # Fine, Strip is ok too.\n", |
826 | 850 | " for i in range(start, len(lines)):\n",
|
827 |
| - " if [s.strip() for s in lines[i : i + len(context)]] == [s.strip() for s in context]:\n", |
| 851 | + " if [s.strip() for s in lines[i : i + len(context)]] == [\n", |
| 852 | + " s.strip() for s in context\n", |
| 853 | + " ]:\n", |
828 | 854 | " return i, 100\n",
|
829 | 855 | " return -1, 0\n",
|
830 | 856 | "\n",
|
831 | 857 | "\n",
|
832 |
| - "def find_context(lines: List[str], context: List[str], start: int, eof: bool) -> Tuple[int, int]:\n", |
| 858 | + "def find_context(\n", |
| 859 | + " lines: List[str], context: List[str], start: int, eof: bool\n", |
| 860 | + ") -> Tuple[int, int]:\n", |
833 | 861 | " if eof:\n",
|
834 | 862 | " new_index, fuzz = find_context_core(lines, context, len(lines) - len(context))\n",
|
835 | 863 | " if new_index != -1:\n",
|
836 | 864 | " return new_index, fuzz\n",
|
837 | 865 | " new_index, fuzz = find_context_core(lines, context, start)\n",
|
838 |
| - " return new_index, fuzz + 10000\n", |
| 866 | + " return new_index, fuzz + 10_000\n", |
839 | 867 | " return find_context_core(lines, context, start)\n",
|
840 | 868 | "\n",
|
841 | 869 | "\n",
|
842 |
| - "def peek_next_section(lines: List[str], index: int) -> Tuple[List[str], List[Chunk], int, bool]:\n", |
| 870 | + "def peek_next_section(\n", |
| 871 | + " lines: List[str], index: int\n", |
| 872 | + ") -> Tuple[List[str], List[Chunk], int, bool]:\n", |
843 | 873 | " old: List[str] = []\n",
|
844 | 874 | " del_lines: List[str] = []\n",
|
845 | 875 | " ins_lines: List[str] = []\n",
|
846 | 876 | " chunks: List[Chunk] = []\n",
|
847 | 877 | " mode = \"keep\"\n",
|
848 | 878 | " orig_index = index\n",
|
| 879 | + "\n", |
849 | 880 | " while index < len(lines):\n",
|
850 | 881 | " s = lines[index]\n",
|
851 | 882 | " if s.startswith(\n",
|
|
861 | 892 | " break\n",
|
862 | 893 | " if s == \"***\":\n",
|
863 | 894 | " break\n",
|
864 |
| - " elif s.startswith(\"***\"):\n", |
| 895 | + " if s.startswith(\"***\"):\n", |
865 | 896 | " raise DiffError(f\"Invalid Line: {s}\")\n",
|
866 | 897 | " index += 1\n",
|
| 898 | + "\n", |
867 | 899 | " last_mode = mode\n",
|
868 | 900 | " if s == \"\":\n",
|
869 | 901 | " s = \" \"\n",
|
|
876 | 908 | " else:\n",
|
877 | 909 | " raise DiffError(f\"Invalid Line: {s}\")\n",
|
878 | 910 | " s = s[1:]\n",
|
| 911 | + "\n", |
879 | 912 | " if mode == \"keep\" and last_mode != mode:\n",
|
880 | 913 | " if ins_lines or del_lines:\n",
|
881 | 914 | " chunks.append(\n",
|
|
885 | 918 | " ins_lines=ins_lines,\n",
|
886 | 919 | " )\n",
|
887 | 920 | " )\n",
|
888 |
| - " del_lines = []\n", |
889 |
| - " ins_lines = []\n", |
| 921 | + " del_lines, ins_lines = [], []\n", |
| 922 | + "\n", |
890 | 923 | " if mode == \"delete\":\n",
|
891 | 924 | " del_lines.append(s)\n",
|
892 | 925 | " old.append(s)\n",
|
893 | 926 | " elif mode == \"add\":\n",
|
894 | 927 | " ins_lines.append(s)\n",
|
895 | 928 | " elif mode == \"keep\":\n",
|
896 | 929 | " old.append(s)\n",
|
| 930 | + "\n", |
897 | 931 | " if ins_lines or del_lines:\n",
|
898 | 932 | " chunks.append(\n",
|
899 | 933 | " Chunk(\n",
|
|
902 | 936 | " ins_lines=ins_lines,\n",
|
903 | 937 | " )\n",
|
904 | 938 | " )\n",
|
905 |
| - " del_lines = []\n", |
906 |
| - " ins_lines = []\n", |
| 939 | + "\n", |
907 | 940 | " if index < len(lines) and lines[index] == \"*** End of File\":\n",
|
908 | 941 | " index += 1\n",
|
909 | 942 | " return old, chunks, index, True\n",
|
| 943 | + "\n", |
910 | 944 | " if index == orig_index:\n",
|
911 |
| - " raise DiffError(f\"Nothing in this section - {index=} {lines[index]}\")\n", |
| 945 | + " raise DiffError(\"Nothing in this section\")\n", |
912 | 946 | " return old, chunks, index, False\n",
|
913 | 947 | "\n",
|
914 | 948 | "\n",
|
915 |
| - "def text_to_patch(text: str, orig: Dict[str, str]) -> Tuple[Patch, int]:\n", |
916 |
| - " lines = text.strip().split(\"\\n\")\n", |
917 |
| - " if len(lines) < 2 or not lines[0].startswith(\"*** Begin Patch\") or lines[-1] != \"*** End Patch\":\n", |
918 |
| - " raise DiffError(\"Invalid patch text\")\n", |
919 |
| - "\n", |
920 |
| - " parser = Parser(\n", |
921 |
| - " current_files=orig,\n", |
922 |
| - " lines=lines,\n", |
923 |
| - " index=1,\n", |
924 |
| - " )\n", |
925 |
| - " parser.parse()\n", |
926 |
| - " return parser.patch, parser.fuzz\n", |
927 |
| - "\n", |
928 |
| - "\n", |
929 |
| - "def identify_files_needed(text: str) -> List[str]:\n", |
930 |
| - " lines = text.strip().split(\"\\n\")\n", |
931 |
| - " result = set()\n", |
932 |
| - " for line in lines:\n", |
933 |
| - " if line.startswith(\"*** Update File: \"):\n", |
934 |
| - " result.add(line[len(\"*** Update File: \") :])\n", |
935 |
| - " if line.startswith(\"*** Delete File: \"):\n", |
936 |
| - " result.add(line[len(\"*** Delete File: \") :])\n", |
937 |
| - " return list(result)\n", |
938 |
| - "\n", |
939 |
| - "\n", |
940 |
| - "def identify_files_added(text: str) -> List[str]:\n", |
941 |
| - " lines = text.strip().split(\"\\n\")\n", |
942 |
| - " result = set()\n", |
943 |
| - " for line in lines:\n", |
944 |
| - " if line.startswith(\"*** Add File: \"):\n", |
945 |
| - " result.add(line[len(\"*** Add File: \") :])\n", |
946 |
| - " return list(result)\n", |
947 |
| - "\n", |
948 |
| - "\n", |
| 949 | + "# --------------------------------------------------------------------------- #\n", |
| 950 | + "# Patch → Commit and Commit application\n", |
| 951 | + "# --------------------------------------------------------------------------- #\n", |
949 | 952 | "def _get_updated_file(text: str, action: PatchAction, path: str) -> str:\n",
|
950 |
| - " assert action.type == ActionType.UPDATE\n", |
| 953 | + " if action.type is not ActionType.UPDATE:\n", |
| 954 | + " raise DiffError(\"_get_updated_file called with non-update action\")\n", |
951 | 955 | " orig_lines = text.split(\"\\n\")\n",
|
952 |
| - " dest_lines = []\n", |
| 956 | + " dest_lines: List[str] = []\n", |
953 | 957 | " orig_index = 0\n",
|
954 |
| - " dest_index = 0\n", |
| 958 | + "\n", |
955 | 959 | " for chunk in action.chunks:\n",
|
956 |
| - " # Process the unchanged lines before the chunk\n", |
957 | 960 | " if chunk.orig_index > len(orig_lines):\n",
|
958 | 961 | " raise DiffError(\n",
|
959 |
| - " f\"_get_updated_file: {path}: chunk.orig_index {chunk.orig_index} > len(lines) {len(orig_lines)}\"\n", |
| 962 | + " f\"{path}: chunk.orig_index {chunk.orig_index} exceeds file length\"\n", |
960 | 963 | " )\n",
|
961 | 964 | " if orig_index > chunk.orig_index:\n",
|
962 | 965 | " raise DiffError(\n",
|
963 |
| - " f\"_get_updated_file: {path}: orig_index {orig_index} > chunk.orig_index {chunk.orig_index}\"\n", |
| 966 | + " f\"{path}: overlapping chunks at {orig_index} > {chunk.orig_index}\"\n", |
964 | 967 | " )\n",
|
965 |
| - " assert orig_index <= chunk.orig_index\n", |
| 968 | + "\n", |
966 | 969 | " dest_lines.extend(orig_lines[orig_index : chunk.orig_index])\n",
|
967 |
| - " delta = chunk.orig_index - orig_index\n", |
968 |
| - " orig_index += delta\n", |
969 |
| - " dest_index += delta\n", |
970 |
| - " # Process the inserted lines\n", |
971 |
| - " if chunk.ins_lines:\n", |
972 |
| - " for i in range(len(chunk.ins_lines)):\n", |
973 |
| - " dest_lines.append(chunk.ins_lines[i])\n", |
974 |
| - " dest_index += len(chunk.ins_lines)\n", |
| 970 | + " orig_index = chunk.orig_index\n", |
| 971 | + "\n", |
| 972 | + " dest_lines.extend(chunk.ins_lines)\n", |
975 | 973 | " orig_index += len(chunk.del_lines)\n",
|
976 |
| - " # Final part\n", |
| 974 | + "\n", |
977 | 975 | " dest_lines.extend(orig_lines[orig_index:])\n",
|
978 |
| - " delta = len(orig_lines) - orig_index\n", |
979 |
| - " orig_index += delta\n", |
980 |
| - " dest_index += delta\n", |
981 |
| - " assert orig_index == len(orig_lines)\n", |
982 |
| - " assert dest_index == len(dest_lines)\n", |
983 | 976 | " return \"\\n\".join(dest_lines)\n",
|
984 | 977 | "\n",
|
985 | 978 | "\n",
|
986 | 979 | "def patch_to_commit(patch: Patch, orig: Dict[str, str]) -> Commit:\n",
|
987 | 980 | " commit = Commit()\n",
|
988 | 981 | " for path, action in patch.actions.items():\n",
|
989 |
| - " if action.type == ActionType.DELETE:\n", |
990 |
| - " commit.changes[path] = FileChange(type=ActionType.DELETE, old_content=orig[path])\n", |
991 |
| - " elif action.type == ActionType.ADD:\n", |
992 |
| - " commit.changes[path] = FileChange(type=ActionType.ADD, new_content=action.new_file)\n", |
993 |
| - " elif action.type == ActionType.UPDATE:\n", |
994 |
| - " new_content = _get_updated_file(text=orig[path], action=action, path=path)\n", |
| 982 | + " if action.type is ActionType.DELETE:\n", |
| 983 | + " commit.changes[path] = FileChange(\n", |
| 984 | + " type=ActionType.DELETE, old_content=orig[path]\n", |
| 985 | + " )\n", |
| 986 | + " elif action.type is ActionType.ADD:\n", |
| 987 | + " if action.new_file is None:\n", |
| 988 | + " raise DiffError(\"ADD action without file content\")\n", |
| 989 | + " commit.changes[path] = FileChange(\n", |
| 990 | + " type=ActionType.ADD, new_content=action.new_file\n", |
| 991 | + " )\n", |
| 992 | + " elif action.type is ActionType.UPDATE:\n", |
| 993 | + " new_content = _get_updated_file(orig[path], action, path)\n", |
995 | 994 | " commit.changes[path] = FileChange(\n",
|
996 | 995 | " type=ActionType.UPDATE,\n",
|
997 | 996 | " old_content=orig[path],\n",
|
|
1001 | 1000 | " return commit\n",
|
1002 | 1001 | "\n",
|
1003 | 1002 | "\n",
|
1004 |
| - "class DiffError(ValueError):\n", |
1005 |
| - " pass\n", |
| 1003 | + "# --------------------------------------------------------------------------- #\n", |
| 1004 | + "# User-facing helpers\n", |
| 1005 | + "# --------------------------------------------------------------------------- #\n", |
| 1006 | + "def text_to_patch(text: str, orig: Dict[str, str]) -> Tuple[Patch, int]:\n", |
| 1007 | + " lines = text.splitlines() # preserves blank lines, no strip()\n", |
| 1008 | + " if (\n", |
| 1009 | + " len(lines) < 2\n", |
| 1010 | + " or not Parser._norm(lines[0]).startswith(\"*** Begin Patch\")\n", |
| 1011 | + " or Parser._norm(lines[-1]) != \"*** End Patch\"\n", |
| 1012 | + " ):\n", |
| 1013 | + " raise DiffError(\"Invalid patch text - missing sentinels\")\n", |
| 1014 | + "\n", |
| 1015 | + " parser = Parser(current_files=orig, lines=lines, index=1)\n", |
| 1016 | + " parser.parse()\n", |
| 1017 | + " return parser.patch, parser.fuzz\n", |
1006 | 1018 | "\n",
|
1007 | 1019 | "\n",
|
1008 |
| - "def load_files(paths: List[str], open_fn: Callable) -> Dict[str, str]:\n", |
1009 |
| - " orig = {}\n", |
1010 |
| - " for path in paths:\n", |
1011 |
| - " orig[path] = open_fn(path)\n", |
1012 |
| - " return orig\n", |
| 1020 | + "def identify_files_needed(text: str) -> List[str]:\n", |
| 1021 | + " lines = text.splitlines()\n", |
| 1022 | + " return [\n", |
| 1023 | + " line[len(\"*** Update File: \") :]\n", |
| 1024 | + " for line in lines\n", |
| 1025 | + " if line.startswith(\"*** Update File: \")\n", |
| 1026 | + " ] + [\n", |
| 1027 | + " line[len(\"*** Delete File: \") :]\n", |
| 1028 | + " for line in lines\n", |
| 1029 | + " if line.startswith(\"*** Delete File: \")\n", |
| 1030 | + " ]\n", |
1013 | 1031 | "\n",
|
1014 | 1032 | "\n",
|
1015 |
| - "def apply_commit(commit: Commit, write_fn: Callable, remove_fn: Callable) -> None:\n", |
| 1033 | + "def identify_files_added(text: str) -> List[str]:\n", |
| 1034 | + " lines = text.splitlines()\n", |
| 1035 | + " return [\n", |
| 1036 | + " line[len(\"*** Add File: \") :]\n", |
| 1037 | + " for line in lines\n", |
| 1038 | + " if line.startswith(\"*** Add File: \")\n", |
| 1039 | + " ]\n", |
| 1040 | + "\n", |
| 1041 | + "\n", |
| 1042 | + "# --------------------------------------------------------------------------- #\n", |
| 1043 | + "# File-system helpers\n", |
| 1044 | + "# --------------------------------------------------------------------------- #\n", |
| 1045 | + "def load_files(paths: List[str], open_fn: Callable[[str], str]) -> Dict[str, str]:\n", |
| 1046 | + " return {path: open_fn(path) for path in paths}\n", |
| 1047 | + "\n", |
| 1048 | + "\n", |
| 1049 | + "def apply_commit(\n", |
| 1050 | + " commit: Commit,\n", |
| 1051 | + " write_fn: Callable[[str, str], None],\n", |
| 1052 | + " remove_fn: Callable[[str], None],\n", |
| 1053 | + ") -> None:\n", |
1016 | 1054 | " for path, change in commit.changes.items():\n",
|
1017 |
| - " if change.type == ActionType.DELETE:\n", |
| 1055 | + " if change.type is ActionType.DELETE:\n", |
1018 | 1056 | " remove_fn(path)\n",
|
1019 |
| - " elif change.type == ActionType.ADD:\n", |
| 1057 | + " elif change.type is ActionType.ADD:\n", |
| 1058 | + " if change.new_content is None:\n", |
| 1059 | + " raise DiffError(f\"ADD change for {path} has no content\")\n", |
1020 | 1060 | " write_fn(path, change.new_content)\n",
|
1021 |
| - " elif change.type == ActionType.UPDATE:\n", |
| 1061 | + " elif change.type is ActionType.UPDATE:\n", |
| 1062 | + " if change.new_content is None:\n", |
| 1063 | + " raise DiffError(f\"UPDATE change for {path} has no new content\")\n", |
| 1064 | + " target = change.move_path or path\n", |
| 1065 | + " write_fn(target, change.new_content)\n", |
1022 | 1066 | " if change.move_path:\n",
|
1023 |
| - " write_fn(change.move_path, change.new_content)\n", |
1024 | 1067 | " remove_fn(path)\n",
|
1025 |
| - " else:\n", |
1026 |
| - " write_fn(path, change.new_content)\n", |
1027 | 1068 | "\n",
|
1028 | 1069 | "\n",
|
1029 |
| - "def process_patch(text: str, open_fn: Callable, write_fn: Callable, remove_fn: Callable) -> str:\n", |
1030 |
| - " assert text.startswith(\"*** Begin Patch\")\n", |
| 1070 | + "def process_patch(\n", |
| 1071 | + " text: str,\n", |
| 1072 | + " open_fn: Callable[[str], str],\n", |
| 1073 | + " write_fn: Callable[[str, str], None],\n", |
| 1074 | + " remove_fn: Callable[[str], None],\n", |
| 1075 | + ") -> str:\n", |
| 1076 | + " if not text.startswith(\"*** Begin Patch\"):\n", |
| 1077 | + " raise DiffError(\"Patch text must start with *** Begin Patch\")\n", |
1031 | 1078 | " paths = identify_files_needed(text)\n",
|
1032 | 1079 | " orig = load_files(paths, open_fn)\n",
|
1033 |
| - " patch, fuzz = text_to_patch(text, orig)\n", |
| 1080 | + " patch, _fuzz = text_to_patch(text, orig)\n", |
1034 | 1081 | " commit = patch_to_commit(patch, orig)\n",
|
1035 | 1082 | " apply_commit(commit, write_fn, remove_fn)\n",
|
1036 | 1083 | " return \"Done!\"\n",
|
1037 | 1084 | "\n",
|
1038 | 1085 | "\n",
|
| 1086 | + "# --------------------------------------------------------------------------- #\n", |
| 1087 | + "# Default FS helpers\n", |
| 1088 | + "# --------------------------------------------------------------------------- #\n", |
1039 | 1089 | "def open_file(path: str) -> str:\n",
|
1040 |
| - " with open(path, \"rt\") as f:\n", |
1041 |
| - " return f.read()\n", |
| 1090 | + " with open(path, \"rt\", encoding=\"utf-8\") as fh:\n", |
| 1091 | + " return fh.read()\n", |
1042 | 1092 | "\n",
|
1043 | 1093 | "\n",
|
1044 | 1094 | "def write_file(path: str, content: str) -> None:\n",
|
1045 |
| - " if \"/\" in path:\n", |
1046 |
| - " parent = \"/\".join(path.split(\"/\")[:-1])\n", |
1047 |
| - " os.makedirs(parent, exist_ok=True)\n", |
1048 |
| - " with open(path, \"wt\") as f:\n", |
1049 |
| - " f.write(content)\n", |
| 1095 | + " target = pathlib.Path(path)\n", |
| 1096 | + " target.parent.mkdir(parents=True, exist_ok=True)\n", |
| 1097 | + " with target.open(\"wt\", encoding=\"utf-8\") as fh:\n", |
| 1098 | + " fh.write(content)\n", |
1050 | 1099 | "\n",
|
1051 | 1100 | "\n",
|
1052 | 1101 | "def remove_file(path: str) -> None:\n",
|
1053 |
| - " os.remove(path)\n", |
| 1102 | + " pathlib.Path(path).unlink(missing_ok=True)\n", |
1054 | 1103 | "\n",
|
1055 | 1104 | "\n",
|
| 1105 | + "# --------------------------------------------------------------------------- #\n", |
| 1106 | + "# CLI entry-point\n", |
| 1107 | + "# --------------------------------------------------------------------------- #\n", |
1056 | 1108 | "def main() -> None:\n",
|
1057 | 1109 | " import sys\n",
|
1058 | 1110 | "\n",
|
1059 | 1111 | " patch_text = sys.stdin.read()\n",
|
1060 | 1112 | " if not patch_text:\n",
|
1061 |
| - " print(\"Please pass patch text through stdin\")\n", |
| 1113 | + " print(\"Please pass patch text through stdin\", file=sys.stderr)\n", |
1062 | 1114 | " return\n",
|
1063 | 1115 | " try:\n",
|
1064 | 1116 | " result = process_patch(patch_text, open_file, write_file, remove_file)\n",
|
1065 |
| - " except DiffError as e:\n", |
1066 |
| - " print(str(e))\n", |
| 1117 | + " except DiffError as exc:\n", |
| 1118 | + " print(exc, file=sys.stderr)\n", |
1067 | 1119 | " return\n",
|
1068 | 1120 | " print(result)\n",
|
1069 | 1121 | "\n",
|
|
0 commit comments