-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: kerthcet <kerthcet@gmail.com>
- Loading branch information
Showing
32 changed files
with
18,312 additions
and
295 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
## Location to install dependencies to | ||
LOCALBIN ?= $(shell pwd)/bin | ||
$(LOCALBIN): | ||
mkdir -p $(LOCALBIN) | ||
|
||
HELMIFY ?= $(LOCALBIN)/helmify | ||
|
||
.PHONY: helmify | ||
helmify: $(HELMIFY) ## Download helmify locally if necessary. | ||
$(HELMIFY): $(LOCALBIN) | ||
test -s $(LOCALBIN)/helmify || GOBIN=$(LOCALBIN) go install github.com/arttor/helmify/cmd/helmify@latest | ||
|
||
.PHONY: helm | ||
helm: manifests kustomize helmify | ||
$(KUBECTL) create namespace llmaz-system --dry-run=client -o yaml | $(KUBECTL) apply -f - | ||
$(KUSTOMIZE) build config/default | $(HELMIFY) -crd-dir | ||
|
||
.PHONY: helm-install | ||
helm-install: helm | ||
helm upgrade --install llmaz ./chart --namespace llmaz-system -f ./chart/values.global.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,236 @@ | ||
apiVersion: apiextensions.k8s.io/v1 | ||
kind: CustomResourceDefinition | ||
metadata: | ||
annotations: | ||
controller-gen.kubebuilder.io/version: v0.14.0 | ||
name: openmodels.llmaz.io | ||
spec: | ||
conversion: | ||
strategy: Webhook | ||
webhook: | ||
clientConfig: | ||
service: | ||
name: llmaz-webhook-service | ||
namespace: llmaz-system | ||
path: /convert | ||
conversionReviewVersions: | ||
- v1 | ||
group: llmaz.io | ||
names: | ||
kind: OpenModel | ||
listKind: OpenModelList | ||
plural: openmodels | ||
shortNames: | ||
- om | ||
singular: openmodel | ||
scope: Cluster | ||
versions: | ||
- name: v1alpha1 | ||
schema: | ||
openAPIV3Schema: | ||
description: OpenModel is the Schema for the open models API | ||
properties: | ||
apiVersion: | ||
description: |- | ||
APIVersion defines the versioned schema of this representation of an object. | ||
Servers should convert recognized schemas to the latest internal value, and | ||
may reject unrecognized values. | ||
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | ||
type: string | ||
kind: | ||
description: |- | ||
Kind is a string value representing the REST resource this object represents. | ||
Servers may infer this from the endpoint the client submits requests to. | ||
Cannot be updated. | ||
In CamelCase. | ||
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | ||
type: string | ||
metadata: | ||
type: object | ||
spec: | ||
description: ModelSpec defines the desired state of Model | ||
properties: | ||
familyName: | ||
description: |- | ||
FamilyName represents the model type, like llama2, which will be auto injected | ||
to the labels with the key of `llmaz.io/model-family-name`. | ||
type: string | ||
inferenceFlavors: | ||
description: |- | ||
InferenceFlavors represents the accelerator requirements to serve the model. | ||
Flavors are fungible following the priority represented by the slice order. | ||
items: | ||
description: |- | ||
Flavor defines the accelerator requirements for a model and the necessary parameters | ||
in autoscaling. Right now, it will be used in two places: | ||
- Pod scheduling with node selectors specified. | ||
- Cluster autoscaling with essential parameters provided. | ||
properties: | ||
name: | ||
description: Name represents the flavor name, which will be | ||
used in model claim. | ||
type: string | ||
nodeSelector: | ||
additionalProperties: | ||
type: string | ||
description: |- | ||
NodeSelector represents the node candidates for Pod placements, if a node doesn't | ||
meet the nodeSelector, it will be filtered out in the resourceFungibility scheduler plugin. | ||
If nodeSelector is empty, it means every node is a candidate. | ||
type: object | ||
params: | ||
additionalProperties: | ||
type: string | ||
description: |- | ||
Params stores other useful parameters and will be consumed by the autoscaling components | ||
like cluster-autoscaler, Karpenter. | ||
E.g. when scaling up nodes with 8x Nvidia A00, the parameter can be injected with | ||
instance-type: p4d.24xlarge for AWS. | ||
type: object | ||
requests: | ||
additionalProperties: | ||
anyOf: | ||
- type: integer | ||
- type: string | ||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ | ||
x-kubernetes-int-or-string: true | ||
description: |- | ||
Requests defines the required accelerators to serve the model, like nvidia.com/gpu: 8. | ||
When GPU number is greater than 8, like 32, then multi-host inference is enabled and | ||
32/8=4 hosts will be grouped as an unit, each host will have a resource request as | ||
nvidia.com/gpu: 8. The may change in the future if the GPU number limit is broken. | ||
Not recommended to set the cpu and memory usage here. | ||
If using playground, you can define the cpu/mem usage at backendConfig. | ||
If using service, you can define the cpu/mem at the container resources. | ||
Note: if you define the same accelerator requests at playground/service as well, | ||
the requests here will be covered. | ||
type: object | ||
required: | ||
- name | ||
type: object | ||
maxItems: 8 | ||
type: array | ||
source: | ||
description: |- | ||
Source represents the source of the model, there're several ways to load | ||
the model such as loading from huggingface, OCI registry, s3, host path and so on. | ||
properties: | ||
modelHub: | ||
description: ModelHub represents the model registry for model | ||
downloads. | ||
properties: | ||
filename: | ||
description: |- | ||
Filename refers to a specified model file rather than the whole repo. | ||
This is helpful to download a specified GGUF model rather than downloading | ||
the whole repo which includes all kinds of quantized models. | ||
TODO: this is only supported with Huggingface, add support for ModelScope | ||
in the near future. | ||
type: string | ||
modelID: | ||
description: |- | ||
ModelID refers to the model identifier on model hub, | ||
such as meta-llama/Meta-Llama-3-8B. | ||
type: string | ||
name: | ||
default: Huggingface | ||
description: Name refers to the model registry, such as huggingface. | ||
enum: | ||
- Huggingface | ||
- ModelScope | ||
type: string | ||
revision: | ||
description: |- | ||
Revision refers to a Git revision id which can be a branch name, a tag, or a commit hash. | ||
Most of the time, you don't need to specify it. | ||
type: string | ||
type: object | ||
uri: | ||
description: |- | ||
URI represents a various kinds of model sources following the uri protocol, e.g.: | ||
- OSS: oss://<bucket>.<endpoint>/<path-to-your-model> | ||
type: string | ||
type: object | ||
required: | ||
- familyName | ||
- source | ||
type: object | ||
status: | ||
description: ModelStatus defines the observed state of Model | ||
properties: | ||
conditions: | ||
description: Conditions represents the Inference condition. | ||
items: | ||
description: "Condition contains details for one aspect of the current | ||
state of this API Resource.\n---\nThis struct is intended for | ||
direct use as an array at the field path .status.conditions. For | ||
example,\n\n\n\ttype FooStatus struct{\n\t // Represents the | ||
observations of a foo's current state.\n\t // Known .status.conditions.type | ||
are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // | ||
+patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t | ||
\ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" | ||
patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t | ||
\ // other fields\n\t}" | ||
properties: | ||
lastTransitionTime: | ||
description: |- | ||
lastTransitionTime is the last time the condition transitioned from one status to another. | ||
This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. | ||
format: date-time | ||
type: string | ||
message: | ||
description: |- | ||
message is a human readable message indicating details about the transition. | ||
This may be an empty string. | ||
maxLength: 32768 | ||
type: string | ||
observedGeneration: | ||
description: |- | ||
observedGeneration represents the .metadata.generation that the condition was set based upon. | ||
For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date | ||
with respect to the current state of the instance. | ||
format: int64 | ||
minimum: 0 | ||
type: integer | ||
reason: | ||
description: |- | ||
reason contains a programmatic identifier indicating the reason for the condition's last transition. | ||
Producers of specific condition types may define expected values and meanings for this field, | ||
and whether the values are considered a guaranteed API. | ||
The value should be a CamelCase string. | ||
This field may not be empty. | ||
maxLength: 1024 | ||
minLength: 1 | ||
pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ | ||
type: string | ||
status: | ||
description: status of the condition, one of True, False, Unknown. | ||
enum: | ||
- "True" | ||
- "False" | ||
- Unknown | ||
type: string | ||
type: | ||
description: |- | ||
type of condition in CamelCase or in foo.example.com/CamelCase. | ||
--- | ||
Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be | ||
useful (see .node.status.conditions), the ability to deconflict is important. | ||
The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) | ||
maxLength: 316 | ||
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ | ||
type: string | ||
required: | ||
- lastTransitionTime | ||
- message | ||
- reason | ||
- status | ||
- type | ||
type: object | ||
type: array | ||
type: object | ||
type: object | ||
served: true | ||
storage: true | ||
subresources: | ||
status: {} |
Oops, something went wrong.