Skip to content

Commit 9f6939a

Browse files
authoredApr 14, 2025··
4.1 prompting guide updates: update apply-patch code (#1772)
1 parent 6a47d53 commit 9f6939a

File tree

2 files changed

+259
-202
lines changed

2 files changed

+259
-202
lines changed
 

‎authors.yaml

+6-1
Original file line numberDiff line numberDiff line change
@@ -281,4 +281,9 @@ vishnu-oai:
281281
nm-openai:
282282
name: "Noah MacCallum"
283283
website: "https://x.com/noahmacca"
284-
avatar: "https://avatars.githubusercontent.com/u/171723556"
284+
avatar: "https://avatars.githubusercontent.com/u/171723556"
285+
286+
julian-openai:
287+
name: "Julian Lee"
288+
website: "https://github.com/julian-openai"
289+
avatar: "https://avatars.githubusercontent.com/u/199828632"

‎examples/gpt4-1_prompting_guide.ipynb

+253-201
Original file line numberDiff line numberDiff line change
@@ -591,12 +591,30 @@
591591
"outputs": [],
592592
"source": [
593593
"#!/usr/bin/env python3\n",
594-
"import os\n",
594+
"\n",
595+
"\"\"\"\n",
596+
"A self-contained **pure-Python 3.9+** utility for applying human-readable\n",
597+
"“pseudo-diff” patch files to a collection of text files.\n",
598+
"\"\"\"\n",
599+
"\n",
600+
"from __future__ import annotations\n",
601+
"\n",
602+
"import pathlib\n",
595603
"from dataclasses import dataclass, field\n",
596604
"from enum import Enum\n",
597-
"from typing import Callable, Dict, List, NoReturn, Optional, Tuple, Union\n",
605+
"from typing import (\n",
606+
" Callable,\n",
607+
" Dict,\n",
608+
" List,\n",
609+
" Optional,\n",
610+
" Tuple,\n",
611+
" Union,\n",
612+
")\n",
598613
"\n",
599614
"\n",
615+
"# --------------------------------------------------------------------------- #\n",
616+
"# Domain objects\n",
617+
"# --------------------------------------------------------------------------- #\n",
600618
"class ActionType(str, Enum):\n",
601619
" ADD = \"add\"\n",
602620
" DELETE = \"delete\"\n",
@@ -616,39 +634,19 @@
616634
" changes: Dict[str, FileChange] = field(default_factory=dict)\n",
617635
"\n",
618636
"\n",
619-
"def assemble_changes(\n",
620-
" orig: Dict[str, Optional[str]],\n",
621-
" updated_files: Dict[str, Optional[str]],\n",
622-
") -> Commit:\n",
623-
" commit = Commit()\n",
624-
" for path, new_content in updated_files.items():\n",
625-
" old_content = orig.get(path)\n",
626-
" if old_content == new_content:\n",
627-
" continue\n",
628-
" if old_content is not None and new_content is not None:\n",
629-
" commit.changes[path] = FileChange(\n",
630-
" type=ActionType.UPDATE,\n",
631-
" old_content=old_content,\n",
632-
" new_content=new_content,\n",
633-
" )\n",
634-
" elif new_content is not None:\n",
635-
" commit.changes[path] = FileChange(\n",
636-
" type=ActionType.ADD,\n",
637-
" new_content=new_content,\n",
638-
" )\n",
639-
" elif old_content is not None:\n",
640-
" commit.changes[path] = FileChange(\n",
641-
" type=ActionType.DELETE,\n",
642-
" old_content=old_content,\n",
643-
" )\n",
644-
" else:\n",
645-
" assert False\n",
646-
" return commit\n",
637+
"# --------------------------------------------------------------------------- #\n",
638+
"# Exceptions\n",
639+
"# --------------------------------------------------------------------------- #\n",
640+
"class DiffError(ValueError):\n",
641+
" \"\"\"Any problem detected while parsing or applying a patch.\"\"\"\n",
647642
"\n",
648643
"\n",
644+
"# --------------------------------------------------------------------------- #\n",
645+
"# Helper dataclasses used while parsing patches\n",
646+
"# --------------------------------------------------------------------------- #\n",
649647
"@dataclass\n",
650648
"class Chunk:\n",
651-
" orig_index: int = -1 # line index of the first line in the original file\n",
649+
" orig_index: int = -1\n",
652650
" del_lines: List[str] = field(default_factory=list)\n",
653651
" ins_lines: List[str] = field(default_factory=list)\n",
654652
"\n",
@@ -666,79 +664,108 @@
666664
" actions: Dict[str, PatchAction] = field(default_factory=dict)\n",
667665
"\n",
668666
"\n",
667+
"# --------------------------------------------------------------------------- #\n",
668+
"# Patch text parser\n",
669+
"# --------------------------------------------------------------------------- #\n",
669670
"@dataclass\n",
670671
"class Parser:\n",
671-
" current_files: Dict[str, str] = field(default_factory=dict)\n",
672-
" lines: List[str] = field(default_factory=list)\n",
672+
" current_files: Dict[str, str]\n",
673+
" lines: List[str]\n",
673674
" index: int = 0\n",
674675
" patch: Patch = field(default_factory=Patch)\n",
675676
" fuzz: int = 0\n",
676677
"\n",
678+
" # ------------- low-level helpers -------------------------------------- #\n",
679+
" def _cur_line(self) -> str:\n",
680+
" if self.index >= len(self.lines):\n",
681+
" raise DiffError(\"Unexpected end of input while parsing patch\")\n",
682+
" return self.lines[self.index]\n",
683+
"\n",
684+
" @staticmethod\n",
685+
" def _norm(line: str) -> str:\n",
686+
" \"\"\"Strip CR so comparisons work for both LF and CRLF input.\"\"\"\n",
687+
" return line.rstrip(\"\\r\")\n",
688+
"\n",
689+
" # ------------- scanning convenience ----------------------------------- #\n",
677690
" def is_done(self, prefixes: Optional[Tuple[str, ...]] = None) -> bool:\n",
678691
" if self.index >= len(self.lines):\n",
679692
" return True\n",
680-
" if prefixes and self.lines[self.index].startswith(prefixes):\n",
693+
" if (\n",
694+
" prefixes\n",
695+
" and len(prefixes) > 0\n",
696+
" and self._norm(self._cur_line()).startswith(prefixes)\n",
697+
" ):\n",
681698
" return True\n",
682699
" return False\n",
683700
"\n",
684701
" def startswith(self, prefix: Union[str, Tuple[str, ...]]) -> bool:\n",
685-
" assert self.index < len(self.lines), f\"Index: {self.index} >= {len(self.lines)}\"\n",
686-
" if self.lines[self.index].startswith(prefix):\n",
687-
" return True\n",
688-
" return False\n",
689-
"\n",
690-
" def read_str(self, prefix: str = \"\", return_everything: bool = False) -> str:\n",
691-
" assert self.index < len(self.lines), f\"Index: {self.index} >= {len(self.lines)}\"\n",
692-
" if self.lines[self.index].startswith(prefix):\n",
693-
" if return_everything:\n",
694-
" text = self.lines[self.index]\n",
695-
" else:\n",
696-
" text = self.lines[self.index][len(prefix) :]\n",
702+
" return self._norm(self._cur_line()).startswith(prefix)\n",
703+
"\n",
704+
" def read_str(self, prefix: str) -> str:\n",
705+
" \"\"\"\n",
706+
" Consume the current line if it starts with *prefix* and return the text\n",
707+
" **after** the prefix. Raises if prefix is empty.\n",
708+
" \"\"\"\n",
709+
" if prefix == \"\":\n",
710+
" raise ValueError(\"read_str() requires a non-empty prefix\")\n",
711+
" if self._norm(self._cur_line()).startswith(prefix):\n",
712+
" text = self._cur_line()[len(prefix) :]\n",
697713
" self.index += 1\n",
698714
" return text\n",
699715
" return \"\"\n",
700716
"\n",
701-
" def parse(self) -> NoReturn:\n",
717+
" def read_line(self) -> str:\n",
718+
" \"\"\"Return the current raw line and advance.\"\"\"\n",
719+
" line = self._cur_line()\n",
720+
" self.index += 1\n",
721+
" return line\n",
722+
"\n",
723+
" # ------------- public entry point -------------------------------------- #\n",
724+
" def parse(self) -> None:\n",
702725
" while not self.is_done((\"*** End Patch\",)):\n",
726+
" # ---------- UPDATE ---------- #\n",
703727
" path = self.read_str(\"*** Update File: \")\n",
704728
" if path:\n",
705729
" if path in self.patch.actions:\n",
706-
" raise DiffError(f\"Update File Error: Duplicate Path: {path}\")\n",
730+
" raise DiffError(f\"Duplicate update for file: {path}\")\n",
707731
" move_to = self.read_str(\"*** Move to: \")\n",
708732
" if path not in self.current_files:\n",
709-
" raise DiffError(f\"Update File Error: Missing File: {path}\")\n",
733+
" raise DiffError(f\"Update File Error - missing file: {path}\")\n",
710734
" text = self.current_files[path]\n",
711-
" action = self.parse_update_file(text)\n",
712-
" action.move_path = move_to\n",
735+
" action = self._parse_update_file(text)\n",
736+
" action.move_path = move_to or None\n",
713737
" self.patch.actions[path] = action\n",
714738
" continue\n",
739+
"\n",
740+
" # ---------- DELETE ---------- #\n",
715741
" path = self.read_str(\"*** Delete File: \")\n",
716742
" if path:\n",
717743
" if path in self.patch.actions:\n",
718-
" raise DiffError(f\"Delete File Error: Duplicate Path: {path}\")\n",
744+
" raise DiffError(f\"Duplicate delete for file: {path}\")\n",
719745
" if path not in self.current_files:\n",
720-
" raise DiffError(f\"Delete File Error: Missing File: {path}\")\n",
721-
" self.patch.actions[path] = PatchAction(\n",
722-
" type=ActionType.DELETE,\n",
723-
" )\n",
746+
" raise DiffError(f\"Delete File Error - missing file: {path}\")\n",
747+
" self.patch.actions[path] = PatchAction(type=ActionType.DELETE)\n",
724748
" continue\n",
749+
"\n",
750+
" # ---------- ADD ---------- #\n",
725751
" path = self.read_str(\"*** Add File: \")\n",
726752
" if path:\n",
727753
" if path in self.patch.actions:\n",
728-
" raise DiffError(f\"Add File Error: Duplicate Path: {path}\")\n",
754+
" raise DiffError(f\"Duplicate add for file: {path}\")\n",
729755
" if path in self.current_files:\n",
730-
" raise DiffError(f\"Add File Error: File already exists: {path}\")\n",
731-
" self.patch.actions[path] = self.parse_add_file()\n",
756+
" raise DiffError(f\"Add File Error - file already exists: {path}\")\n",
757+
" self.patch.actions[path] = self._parse_add_file()\n",
732758
" continue\n",
733-
" raise DiffError(f\"Unknown Line: {self.lines[self.index]}\")\n",
759+
"\n",
760+
" raise DiffError(f\"Unknown line while parsing: {self._cur_line()}\")\n",
761+
"\n",
734762
" if not self.startswith(\"*** End Patch\"):\n",
735-
" raise DiffError(\"Missing End Patch\")\n",
736-
" self.index += 1\n",
763+
" raise DiffError(\"Missing *** End Patch sentinel\")\n",
764+
" self.index += 1 # consume sentinel\n",
737765
"\n",
738-
" def parse_update_file(self, text: str) -> PatchAction:\n",
739-
" action = PatchAction(\n",
740-
" type=ActionType.UPDATE,\n",
741-
" )\n",
766+
" # ------------- section parsers ---------------------------------------- #\n",
767+
" def _parse_update_file(self, text: str) -> PatchAction:\n",
768+
" action = PatchAction(type=ActionType.UPDATE)\n",
742769
" lines = text.split(\"\\n\")\n",
743770
" index = 0\n",
744771
" while not self.is_done(\n",
@@ -752,100 +779,104 @@
752779
" ):\n",
753780
" def_str = self.read_str(\"@@ \")\n",
754781
" section_str = \"\"\n",
755-
" if not def_str:\n",
756-
" if self.lines[self.index] == \"@@\":\n",
757-
" section_str = self.lines[self.index]\n",
758-
" self.index += 1\n",
782+
" if not def_str and self._norm(self._cur_line()) == \"@@\":\n",
783+
" section_str = self.read_line()\n",
784+
"\n",
759785
" if not (def_str or section_str or index == 0):\n",
760-
" raise DiffError(f\"Invalid Line:\\n{self.lines[self.index]}\")\n",
786+
" raise DiffError(f\"Invalid line in update section:\\n{self._cur_line()}\")\n",
787+
"\n",
761788
" if def_str.strip():\n",
762789
" found = False\n",
763-
" if not [s for s in lines[:index] if s == def_str]:\n",
764-
" # def str is a skip ahead operator\n",
790+
" if def_str not in lines[:index]:\n",
765791
" for i, s in enumerate(lines[index:], index):\n",
766792
" if s == def_str:\n",
767793
" index = i + 1\n",
768794
" found = True\n",
769795
" break\n",
770-
" if not found and not [s for s in lines[:index] if s.strip() == def_str.strip()]:\n",
771-
" # def str is a skip ahead operator\n",
796+
" if not found and def_str.strip() not in [\n",
797+
" s.strip() for s in lines[:index]\n",
798+
" ]:\n",
772799
" for i, s in enumerate(lines[index:], index):\n",
773800
" if s.strip() == def_str.strip():\n",
774801
" index = i + 1\n",
775802
" self.fuzz += 1\n",
776803
" found = True\n",
777804
" break\n",
778-
" next_chunk_context, chunks, end_patch_index, eof = peek_next_section(\n",
779-
" self.lines, self.index\n",
780-
" )\n",
781-
" next_chunk_text = \"\\n\".join(next_chunk_context)\n",
782-
" new_index, fuzz = find_context(lines, next_chunk_context, index, eof)\n",
805+
"\n",
806+
" next_ctx, chunks, end_idx, eof = peek_next_section(self.lines, self.index)\n",
807+
" new_index, fuzz = find_context(lines, next_ctx, index, eof)\n",
783808
" if new_index == -1:\n",
784-
" if eof:\n",
785-
" raise DiffError(f\"Invalid EOF Context {index}:\\n{next_chunk_text}\")\n",
786-
" else:\n",
787-
" raise DiffError(f\"Invalid Context {index}:\\n{next_chunk_text}\")\n",
809+
" ctx_txt = \"\\n\".join(next_ctx)\n",
810+
" raise DiffError(\n",
811+
" f\"Invalid {'EOF ' if eof else ''}context at {index}:\\n{ctx_txt}\"\n",
812+
" )\n",
788813
" self.fuzz += fuzz\n",
789814
" for ch in chunks:\n",
790815
" ch.orig_index += new_index\n",
791816
" action.chunks.append(ch)\n",
792-
" index = new_index + len(next_chunk_context)\n",
793-
" self.index = end_patch_index\n",
794-
" continue\n",
817+
" index = new_index + len(next_ctx)\n",
818+
" self.index = end_idx\n",
795819
" return action\n",
796820
"\n",
797-
" def parse_add_file(self) -> PatchAction:\n",
798-
" lines = []\n",
821+
" def _parse_add_file(self) -> PatchAction:\n",
822+
" lines: List[str] = []\n",
799823
" while not self.is_done(\n",
800824
" (\"*** End Patch\", \"*** Update File:\", \"*** Delete File:\", \"*** Add File:\")\n",
801825
" ):\n",
802-
" s = self.read_str()\n",
826+
" s = self.read_line()\n",
803827
" if not s.startswith(\"+\"):\n",
804-
" raise DiffError(f\"Invalid Add File Line: {s}\")\n",
805-
" s = s[1:]\n",
806-
" lines.append(s)\n",
807-
" return PatchAction(\n",
808-
" type=ActionType.ADD,\n",
809-
" new_file=\"\\n\".join(lines),\n",
810-
" )\n",
828+
" raise DiffError(f\"Invalid Add File line (missing '+'): {s}\")\n",
829+
" lines.append(s[1:]) # strip leading '+'\n",
830+
" return PatchAction(type=ActionType.ADD, new_file=\"\\n\".join(lines))\n",
811831
"\n",
812832
"\n",
813-
"def find_context_core(lines: List[str], context: List[str], start: int) -> Tuple[int, int]:\n",
833+
"# --------------------------------------------------------------------------- #\n",
834+
"# Helper functions\n",
835+
"# --------------------------------------------------------------------------- #\n",
836+
"def find_context_core(\n",
837+
" lines: List[str], context: List[str], start: int\n",
838+
") -> Tuple[int, int]:\n",
814839
" if not context:\n",
815840
" return start, 0\n",
816841
"\n",
817-
" # Prefer identical\n",
818842
" for i in range(start, len(lines)):\n",
819843
" if lines[i : i + len(context)] == context:\n",
820844
" return i, 0\n",
821-
" # RStrip is ok\n",
822845
" for i in range(start, len(lines)):\n",
823-
" if [s.rstrip() for s in lines[i : i + len(context)]] == [s.rstrip() for s in context]:\n",
846+
" if [s.rstrip() for s in lines[i : i + len(context)]] == [\n",
847+
" s.rstrip() for s in context\n",
848+
" ]:\n",
824849
" return i, 1\n",
825-
" # Fine, Strip is ok too.\n",
826850
" for i in range(start, len(lines)):\n",
827-
" if [s.strip() for s in lines[i : i + len(context)]] == [s.strip() for s in context]:\n",
851+
" if [s.strip() for s in lines[i : i + len(context)]] == [\n",
852+
" s.strip() for s in context\n",
853+
" ]:\n",
828854
" return i, 100\n",
829855
" return -1, 0\n",
830856
"\n",
831857
"\n",
832-
"def find_context(lines: List[str], context: List[str], start: int, eof: bool) -> Tuple[int, int]:\n",
858+
"def find_context(\n",
859+
" lines: List[str], context: List[str], start: int, eof: bool\n",
860+
") -> Tuple[int, int]:\n",
833861
" if eof:\n",
834862
" new_index, fuzz = find_context_core(lines, context, len(lines) - len(context))\n",
835863
" if new_index != -1:\n",
836864
" return new_index, fuzz\n",
837865
" new_index, fuzz = find_context_core(lines, context, start)\n",
838-
" return new_index, fuzz + 10000\n",
866+
" return new_index, fuzz + 10_000\n",
839867
" return find_context_core(lines, context, start)\n",
840868
"\n",
841869
"\n",
842-
"def peek_next_section(lines: List[str], index: int) -> Tuple[List[str], List[Chunk], int, bool]:\n",
870+
"def peek_next_section(\n",
871+
" lines: List[str], index: int\n",
872+
") -> Tuple[List[str], List[Chunk], int, bool]:\n",
843873
" old: List[str] = []\n",
844874
" del_lines: List[str] = []\n",
845875
" ins_lines: List[str] = []\n",
846876
" chunks: List[Chunk] = []\n",
847877
" mode = \"keep\"\n",
848878
" orig_index = index\n",
879+
"\n",
849880
" while index < len(lines):\n",
850881
" s = lines[index]\n",
851882
" if s.startswith(\n",
@@ -861,9 +892,10 @@
861892
" break\n",
862893
" if s == \"***\":\n",
863894
" break\n",
864-
" elif s.startswith(\"***\"):\n",
895+
" if s.startswith(\"***\"):\n",
865896
" raise DiffError(f\"Invalid Line: {s}\")\n",
866897
" index += 1\n",
898+
"\n",
867899
" last_mode = mode\n",
868900
" if s == \"\":\n",
869901
" s = \" \"\n",
@@ -876,6 +908,7 @@
876908
" else:\n",
877909
" raise DiffError(f\"Invalid Line: {s}\")\n",
878910
" s = s[1:]\n",
911+
"\n",
879912
" if mode == \"keep\" and last_mode != mode:\n",
880913
" if ins_lines or del_lines:\n",
881914
" chunks.append(\n",
@@ -885,15 +918,16 @@
885918
" ins_lines=ins_lines,\n",
886919
" )\n",
887920
" )\n",
888-
" del_lines = []\n",
889-
" ins_lines = []\n",
921+
" del_lines, ins_lines = [], []\n",
922+
"\n",
890923
" if mode == \"delete\":\n",
891924
" del_lines.append(s)\n",
892925
" old.append(s)\n",
893926
" elif mode == \"add\":\n",
894927
" ins_lines.append(s)\n",
895928
" elif mode == \"keep\":\n",
896929
" old.append(s)\n",
930+
"\n",
897931
" if ins_lines or del_lines:\n",
898932
" chunks.append(\n",
899933
" Chunk(\n",
@@ -902,96 +936,61 @@
902936
" ins_lines=ins_lines,\n",
903937
" )\n",
904938
" )\n",
905-
" del_lines = []\n",
906-
" ins_lines = []\n",
939+
"\n",
907940
" if index < len(lines) and lines[index] == \"*** End of File\":\n",
908941
" index += 1\n",
909942
" return old, chunks, index, True\n",
943+
"\n",
910944
" if index == orig_index:\n",
911-
" raise DiffError(f\"Nothing in this section - {index=} {lines[index]}\")\n",
945+
" raise DiffError(\"Nothing in this section\")\n",
912946
" return old, chunks, index, False\n",
913947
"\n",
914948
"\n",
915-
"def text_to_patch(text: str, orig: Dict[str, str]) -> Tuple[Patch, int]:\n",
916-
" lines = text.strip().split(\"\\n\")\n",
917-
" if len(lines) < 2 or not lines[0].startswith(\"*** Begin Patch\") or lines[-1] != \"*** End Patch\":\n",
918-
" raise DiffError(\"Invalid patch text\")\n",
919-
"\n",
920-
" parser = Parser(\n",
921-
" current_files=orig,\n",
922-
" lines=lines,\n",
923-
" index=1,\n",
924-
" )\n",
925-
" parser.parse()\n",
926-
" return parser.patch, parser.fuzz\n",
927-
"\n",
928-
"\n",
929-
"def identify_files_needed(text: str) -> List[str]:\n",
930-
" lines = text.strip().split(\"\\n\")\n",
931-
" result = set()\n",
932-
" for line in lines:\n",
933-
" if line.startswith(\"*** Update File: \"):\n",
934-
" result.add(line[len(\"*** Update File: \") :])\n",
935-
" if line.startswith(\"*** Delete File: \"):\n",
936-
" result.add(line[len(\"*** Delete File: \") :])\n",
937-
" return list(result)\n",
938-
"\n",
939-
"\n",
940-
"def identify_files_added(text: str) -> List[str]:\n",
941-
" lines = text.strip().split(\"\\n\")\n",
942-
" result = set()\n",
943-
" for line in lines:\n",
944-
" if line.startswith(\"*** Add File: \"):\n",
945-
" result.add(line[len(\"*** Add File: \") :])\n",
946-
" return list(result)\n",
947-
"\n",
948-
"\n",
949+
"# --------------------------------------------------------------------------- #\n",
950+
"# Patch → Commit and Commit application\n",
951+
"# --------------------------------------------------------------------------- #\n",
949952
"def _get_updated_file(text: str, action: PatchAction, path: str) -> str:\n",
950-
" assert action.type == ActionType.UPDATE\n",
953+
" if action.type is not ActionType.UPDATE:\n",
954+
" raise DiffError(\"_get_updated_file called with non-update action\")\n",
951955
" orig_lines = text.split(\"\\n\")\n",
952-
" dest_lines = []\n",
956+
" dest_lines: List[str] = []\n",
953957
" orig_index = 0\n",
954-
" dest_index = 0\n",
958+
"\n",
955959
" for chunk in action.chunks:\n",
956-
" # Process the unchanged lines before the chunk\n",
957960
" if chunk.orig_index > len(orig_lines):\n",
958961
" raise DiffError(\n",
959-
" f\"_get_updated_file: {path}: chunk.orig_index {chunk.orig_index} > len(lines) {len(orig_lines)}\"\n",
962+
" f\"{path}: chunk.orig_index {chunk.orig_index} exceeds file length\"\n",
960963
" )\n",
961964
" if orig_index > chunk.orig_index:\n",
962965
" raise DiffError(\n",
963-
" f\"_get_updated_file: {path}: orig_index {orig_index} > chunk.orig_index {chunk.orig_index}\"\n",
966+
" f\"{path}: overlapping chunks at {orig_index} > {chunk.orig_index}\"\n",
964967
" )\n",
965-
" assert orig_index <= chunk.orig_index\n",
968+
"\n",
966969
" dest_lines.extend(orig_lines[orig_index : chunk.orig_index])\n",
967-
" delta = chunk.orig_index - orig_index\n",
968-
" orig_index += delta\n",
969-
" dest_index += delta\n",
970-
" # Process the inserted lines\n",
971-
" if chunk.ins_lines:\n",
972-
" for i in range(len(chunk.ins_lines)):\n",
973-
" dest_lines.append(chunk.ins_lines[i])\n",
974-
" dest_index += len(chunk.ins_lines)\n",
970+
" orig_index = chunk.orig_index\n",
971+
"\n",
972+
" dest_lines.extend(chunk.ins_lines)\n",
975973
" orig_index += len(chunk.del_lines)\n",
976-
" # Final part\n",
974+
"\n",
977975
" dest_lines.extend(orig_lines[orig_index:])\n",
978-
" delta = len(orig_lines) - orig_index\n",
979-
" orig_index += delta\n",
980-
" dest_index += delta\n",
981-
" assert orig_index == len(orig_lines)\n",
982-
" assert dest_index == len(dest_lines)\n",
983976
" return \"\\n\".join(dest_lines)\n",
984977
"\n",
985978
"\n",
986979
"def patch_to_commit(patch: Patch, orig: Dict[str, str]) -> Commit:\n",
987980
" commit = Commit()\n",
988981
" for path, action in patch.actions.items():\n",
989-
" if action.type == ActionType.DELETE:\n",
990-
" commit.changes[path] = FileChange(type=ActionType.DELETE, old_content=orig[path])\n",
991-
" elif action.type == ActionType.ADD:\n",
992-
" commit.changes[path] = FileChange(type=ActionType.ADD, new_content=action.new_file)\n",
993-
" elif action.type == ActionType.UPDATE:\n",
994-
" new_content = _get_updated_file(text=orig[path], action=action, path=path)\n",
982+
" if action.type is ActionType.DELETE:\n",
983+
" commit.changes[path] = FileChange(\n",
984+
" type=ActionType.DELETE, old_content=orig[path]\n",
985+
" )\n",
986+
" elif action.type is ActionType.ADD:\n",
987+
" if action.new_file is None:\n",
988+
" raise DiffError(\"ADD action without file content\")\n",
989+
" commit.changes[path] = FileChange(\n",
990+
" type=ActionType.ADD, new_content=action.new_file\n",
991+
" )\n",
992+
" elif action.type is ActionType.UPDATE:\n",
993+
" new_content = _get_updated_file(orig[path], action, path)\n",
995994
" commit.changes[path] = FileChange(\n",
996995
" type=ActionType.UPDATE,\n",
997996
" old_content=orig[path],\n",
@@ -1001,69 +1000,122 @@
10011000
" return commit\n",
10021001
"\n",
10031002
"\n",
1004-
"class DiffError(ValueError):\n",
1005-
" pass\n",
1003+
"# --------------------------------------------------------------------------- #\n",
1004+
"# User-facing helpers\n",
1005+
"# --------------------------------------------------------------------------- #\n",
1006+
"def text_to_patch(text: str, orig: Dict[str, str]) -> Tuple[Patch, int]:\n",
1007+
" lines = text.splitlines() # preserves blank lines, no strip()\n",
1008+
" if (\n",
1009+
" len(lines) < 2\n",
1010+
" or not Parser._norm(lines[0]).startswith(\"*** Begin Patch\")\n",
1011+
" or Parser._norm(lines[-1]) != \"*** End Patch\"\n",
1012+
" ):\n",
1013+
" raise DiffError(\"Invalid patch text - missing sentinels\")\n",
1014+
"\n",
1015+
" parser = Parser(current_files=orig, lines=lines, index=1)\n",
1016+
" parser.parse()\n",
1017+
" return parser.patch, parser.fuzz\n",
10061018
"\n",
10071019
"\n",
1008-
"def load_files(paths: List[str], open_fn: Callable) -> Dict[str, str]:\n",
1009-
" orig = {}\n",
1010-
" for path in paths:\n",
1011-
" orig[path] = open_fn(path)\n",
1012-
" return orig\n",
1020+
"def identify_files_needed(text: str) -> List[str]:\n",
1021+
" lines = text.splitlines()\n",
1022+
" return [\n",
1023+
" line[len(\"*** Update File: \") :]\n",
1024+
" for line in lines\n",
1025+
" if line.startswith(\"*** Update File: \")\n",
1026+
" ] + [\n",
1027+
" line[len(\"*** Delete File: \") :]\n",
1028+
" for line in lines\n",
1029+
" if line.startswith(\"*** Delete File: \")\n",
1030+
" ]\n",
10131031
"\n",
10141032
"\n",
1015-
"def apply_commit(commit: Commit, write_fn: Callable, remove_fn: Callable) -> None:\n",
1033+
"def identify_files_added(text: str) -> List[str]:\n",
1034+
" lines = text.splitlines()\n",
1035+
" return [\n",
1036+
" line[len(\"*** Add File: \") :]\n",
1037+
" for line in lines\n",
1038+
" if line.startswith(\"*** Add File: \")\n",
1039+
" ]\n",
1040+
"\n",
1041+
"\n",
1042+
"# --------------------------------------------------------------------------- #\n",
1043+
"# File-system helpers\n",
1044+
"# --------------------------------------------------------------------------- #\n",
1045+
"def load_files(paths: List[str], open_fn: Callable[[str], str]) -> Dict[str, str]:\n",
1046+
" return {path: open_fn(path) for path in paths}\n",
1047+
"\n",
1048+
"\n",
1049+
"def apply_commit(\n",
1050+
" commit: Commit,\n",
1051+
" write_fn: Callable[[str, str], None],\n",
1052+
" remove_fn: Callable[[str], None],\n",
1053+
") -> None:\n",
10161054
" for path, change in commit.changes.items():\n",
1017-
" if change.type == ActionType.DELETE:\n",
1055+
" if change.type is ActionType.DELETE:\n",
10181056
" remove_fn(path)\n",
1019-
" elif change.type == ActionType.ADD:\n",
1057+
" elif change.type is ActionType.ADD:\n",
1058+
" if change.new_content is None:\n",
1059+
" raise DiffError(f\"ADD change for {path} has no content\")\n",
10201060
" write_fn(path, change.new_content)\n",
1021-
" elif change.type == ActionType.UPDATE:\n",
1061+
" elif change.type is ActionType.UPDATE:\n",
1062+
" if change.new_content is None:\n",
1063+
" raise DiffError(f\"UPDATE change for {path} has no new content\")\n",
1064+
" target = change.move_path or path\n",
1065+
" write_fn(target, change.new_content)\n",
10221066
" if change.move_path:\n",
1023-
" write_fn(change.move_path, change.new_content)\n",
10241067
" remove_fn(path)\n",
1025-
" else:\n",
1026-
" write_fn(path, change.new_content)\n",
10271068
"\n",
10281069
"\n",
1029-
"def process_patch(text: str, open_fn: Callable, write_fn: Callable, remove_fn: Callable) -> str:\n",
1030-
" assert text.startswith(\"*** Begin Patch\")\n",
1070+
"def process_patch(\n",
1071+
" text: str,\n",
1072+
" open_fn: Callable[[str], str],\n",
1073+
" write_fn: Callable[[str, str], None],\n",
1074+
" remove_fn: Callable[[str], None],\n",
1075+
") -> str:\n",
1076+
" if not text.startswith(\"*** Begin Patch\"):\n",
1077+
" raise DiffError(\"Patch text must start with *** Begin Patch\")\n",
10311078
" paths = identify_files_needed(text)\n",
10321079
" orig = load_files(paths, open_fn)\n",
1033-
" patch, fuzz = text_to_patch(text, orig)\n",
1080+
" patch, _fuzz = text_to_patch(text, orig)\n",
10341081
" commit = patch_to_commit(patch, orig)\n",
10351082
" apply_commit(commit, write_fn, remove_fn)\n",
10361083
" return \"Done!\"\n",
10371084
"\n",
10381085
"\n",
1086+
"# --------------------------------------------------------------------------- #\n",
1087+
"# Default FS helpers\n",
1088+
"# --------------------------------------------------------------------------- #\n",
10391089
"def open_file(path: str) -> str:\n",
1040-
" with open(path, \"rt\") as f:\n",
1041-
" return f.read()\n",
1090+
" with open(path, \"rt\", encoding=\"utf-8\") as fh:\n",
1091+
" return fh.read()\n",
10421092
"\n",
10431093
"\n",
10441094
"def write_file(path: str, content: str) -> None:\n",
1045-
" if \"/\" in path:\n",
1046-
" parent = \"/\".join(path.split(\"/\")[:-1])\n",
1047-
" os.makedirs(parent, exist_ok=True)\n",
1048-
" with open(path, \"wt\") as f:\n",
1049-
" f.write(content)\n",
1095+
" target = pathlib.Path(path)\n",
1096+
" target.parent.mkdir(parents=True, exist_ok=True)\n",
1097+
" with target.open(\"wt\", encoding=\"utf-8\") as fh:\n",
1098+
" fh.write(content)\n",
10501099
"\n",
10511100
"\n",
10521101
"def remove_file(path: str) -> None:\n",
1053-
" os.remove(path)\n",
1102+
" pathlib.Path(path).unlink(missing_ok=True)\n",
10541103
"\n",
10551104
"\n",
1105+
"# --------------------------------------------------------------------------- #\n",
1106+
"# CLI entry-point\n",
1107+
"# --------------------------------------------------------------------------- #\n",
10561108
"def main() -> None:\n",
10571109
" import sys\n",
10581110
"\n",
10591111
" patch_text = sys.stdin.read()\n",
10601112
" if not patch_text:\n",
1061-
" print(\"Please pass patch text through stdin\")\n",
1113+
" print(\"Please pass patch text through stdin\", file=sys.stderr)\n",
10621114
" return\n",
10631115
" try:\n",
10641116
" result = process_patch(patch_text, open_file, write_file, remove_file)\n",
1065-
" except DiffError as e:\n",
1066-
" print(str(e))\n",
1117+
" except DiffError as exc:\n",
1118+
" print(exc, file=sys.stderr)\n",
10671119
" return\n",
10681120
" print(result)\n",
10691121
"\n",

0 commit comments

Comments
 (0)
Please sign in to comment.