{"payload":{"pageCount":1,"repositories":[{"type":"Public","name":"cortex","owner":"janhq","isFork":false,"description":"Drop-in, local AI alternative to the OpenAI stack. Multi-engine (llama.cpp, TensorRT-LLM). Powers 👋 Jan","allTopics":["ai","cuda","llama","accelerated","inference-engine","openai-api","llm","stable-diffusion","llms","llamacpp","llama2","gguf","tensorrt-llm"],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":5,"issueCount":74,"starsCount":1688,"forksCount":85,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,39,46,18,20,5,10,25,32,128,41,29,37,30,15,39,30,53,44,77,21,23,20,12,2,0,2,6,22,28,6,2,0,6,36,44,48],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-04T15:23:12.802Z"}},{"type":"Public","name":"cortex.llamacpp","owner":"janhq","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":3,"issueCount":5,"starsCount":3,"forksCount":1,"license":"GNU Affero General Public License v3.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,6,5,8,14,11],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-04T10:30:56.532Z"}},{"type":"Public","name":"cortex.onnx","owner":"janhq","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":1,"issueCount":0,"starsCount":0,"forksCount":0,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-04T09:34:00.055Z"}},{"type":"Public","name":"cortex.tensorrt-llm","owner":"janhq","isFork":true,"description":"Nitro is an C++ inference server on top of TensorRT-LLM. OpenAI-compatible API. Run blazing fast inference on Nvidia GPUs. Used in Jan","allTopics":["jan","tensorrt","llm","tensorrt-llm"],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":1,"issueCount":8,"starsCount":28,"forksCount":750,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,5,0,6,8,2,0,6,2,1,4,1,1,0,1,3,2,1,1,6,1,1,4,2,35,23,1,2,0,1,1,1,2,2,2,2,1],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-03T12:43:54.277Z"}},{"type":"Public","name":"cortex.python","owner":"janhq","isFork":false,"description":"C++ code that run Python embedding","allTopics":[],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":0,"issueCount":0,"starsCount":3,"forksCount":0,"license":"GNU Affero General Public License v3.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,1,3,3,9,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-23T13:36:05.468Z"}},{"type":"Public","name":"infinity","owner":"janhq","isFork":true,"description":"The AI-native database built for LLM applications, providing incredibly fast vector and full-text search ","allTopics":[],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":152,"license":"Apache License 2.0","participation":[2,14,14,3,2,3,1,1,7,4,1,7,12,19,17,20,26,19,7,1,15,18,27,25,19,35,30,46,45,27,17,8,13,12,28,16,22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-02-19T06:45:53.239Z"}},{"type":"Public","name":"llama.cpp-avx-vnni","owner":"janhq","isFork":true,"description":"Port of Facebook's LLaMA model in C/C++","allTopics":[],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":8482,"license":"MIT License","participation":[43,54,34,27,33,34,52,36,29,23,26,95,70,37,41,17,37,47,28,34,27,47,20,36,24,37,12,39,39,43,52,88,73,58,60,60,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-02-19T07:04:40.274Z"}},{"type":"Public","name":"TensorRT","owner":"janhq","isFork":true,"description":"NVIDIA® TensorRT™, an SDK for high-performance deep learning inference, includes a deep learning inference optimizer and runtime that delivers low latency and high throughput for inference applications.","allTopics":[],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":1990,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-09-03T03:08:17.003Z"}}],"repositoryCount":8,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"Repositories"}