feat: sync llama.cpp changes (#7)

* feat: sync llama.cpp * fix: update patch * docs(readme): remove note of ggml tensor allocor
mybigday · Aug 17, 2023 · 81257bf · 81257bf
1 parent e162586
commit 81257bf
Show file tree

Hide file tree

Showing 4 changed files with 6 additions and 25 deletions.
diff --git a/README.md b/README.md
@@ -78,7 +78,6 @@ jest.mock('llama.rn', () => require('llama.rn/jest/mock'))
 - Metal:
   - We have tested to know some devices is not able to use Metal ('params.n_gpu_layers > 0') due to llama.cpp used SIMD-scoped operation, you can check if your device is supported in [Metal feature set tables](https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf), Apple7 GPU will be the minimum requirement.
   - It's also not supported in iOS simulator due to [this limitation](https://developer.apple.com/documentation/metal/developing_metal_apps_that_run_in_simulator#3241609), we used constant buffers more than 14.
-- We can use the ggml tensor allocor (See [llama.cpp#2411](https://github.com/ggerganov/llama.cpp/pull/2411)) by use `RNLLAMA_DISABLE_METAL=1` env on pod install, which reduces the memory usage. If you only want to use CPU, this is very useful.
 
 ## Contributing
 

diff --git a/example/ios/Podfile.lock b/example/ios/Podfile.lock
@@ -1223,7 +1223,7 @@ SPEC CHECKSUMS:
   glog: 04b94705f318337d7ead9e6d17c019bd9b1f6b1b
   hermes-engine: 10fbd3f62405c41ea07e71973ea61e1878d07322
   libevent: 4049cae6c81cdb3654a443be001fb9bdceff7913
-  llama-rn: 7773938cfbe2349f06e72c095b51de089acb281f
+  llama-rn: f8cb1160d9506a40743054510177149d24daf516
   RCT-Folly: 424b8c9a7a0b9ab2886ffe9c3b041ef628fd4fb1
   RCTRequired: a2faf4bad4e438ca37b2040cb8f7799baa065c18
   RCTTypeSafety: cb09f3e4747b6d18331a15eb05271de7441ca0b3

diff --git a/llama.cpp b/llama.cpp
diff --git a/scripts/ggml-metal.m.patch b/scripts/ggml-metal.m.patch
@@ -1,24 +1,6 @@
---- ggml-metal-orig.m	2023-08-16 09:13:47
-+++ ggml-metal.m	2023-08-16 09:22:15
-@@ -163,10 +163,15 @@
-
-     // load kernels
-     {
-+        NSError * error = nil;
- #define LM_GGML_METAL_ADD_KERNEL(name) \
-         ctx->function_##name = [ctx->library newFunctionWithName:@"kernel_"#name]; \
--        ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:nil]; \
--        fprintf(stderr, "%s: loaded %-32s %16p\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name);
-+        ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:&error]; \
-+        fprintf(stderr, "%s: loaded %-32s %16p\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name); \
-+        if (error) { \
-+            fprintf(stderr, "%s: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
-+            return NULL; \
-+        }
-
-         LM_GGML_METAL_ADD_KERNEL(add);
-         LM_GGML_METAL_ADD_KERNEL(add_row);
-@@ -205,13 +210,13 @@
+--- ggml-metal-orig.m	2023-08-17 10:15:26
++++ ggml-metal.m	2023-08-17 10:14:18
+@@ -218,13 +218,13 @@
  #undef LM_GGML_METAL_ADD_KERNEL
      }
 
@@ -38,7 +20,7 @@
 
      return ctx;
  }
-@@ -337,15 +342,15 @@
+@@ -351,15 +351,15 @@
              }
          }
+10 −2		CMakeLists.txt
+1 −1		Makefile
+0 −2		flake.nix
+24 −1		ggml-alloc.c
+4 −0		ggml-alloc.h
+6 −3		ggml-metal.h
+70 −125		ggml-metal.m
+471 −498		ggml-metal.metal
+20 −32		llama.cpp