Add template files

Signed-off-by: kerthcet <kerthcet@gmail.com>
InftyAI · Sep 11, 2024 · c0395e7 · c0395e7
1 parent 458de44
commit c0395e7
Show file tree

Hide file tree

Showing 32 changed files with 18,312 additions and 295 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/Makefile b/Makefile
@@ -0,0 +1,20 @@
+## Location to install dependencies to
+LOCALBIN ?= $(shell pwd)/bin
+$(LOCALBIN):
+	mkdir -p $(LOCALBIN)
+
+HELMIFY ?= $(LOCALBIN)/helmify
+
+.PHONY: helmify
+helmify: $(HELMIFY) ## Download helmify locally if necessary.
+$(HELMIFY): $(LOCALBIN)
+	test -s $(LOCALBIN)/helmify || GOBIN=$(LOCALBIN) go install github.com/arttor/helmify/cmd/helmify@latest
+
+.PHONY: helm
+helm: manifests kustomize helmify
+	$(KUBECTL) create namespace llmaz-system --dry-run=client -o yaml | $(KUBECTL) apply -f -
+	$(KUSTOMIZE) build config/default | $(HELMIFY) -crd-dir
+
+.PHONY: helm-install
+helm-install: helm
+	helm upgrade --install llmaz ./chart --namespace llmaz-system -f ./chart/values.global.yaml
diff --git a/README.md b/README.md
@@ -6,5 +6,7 @@
 </p>
 
 <h3 align="center">
-Helm chart for llmaz
+A Helm Chart for llmaz
 </h3>
+
+All the chart files are now hosted in [llmaz](https://github.com/InftyAI/llmaz) for easy integration, please refer to [installation.md](https://github.com/InftyAI/llmaz/blob/main/docs/installation.md) for details.
diff --git a/llmaz/.DS_Store b/llmaz/.DS_Store
diff --git a/llmaz/Chart.yaml b/llmaz/Chart.yaml
@@ -1,7 +1,6 @@
 apiVersion: v2
 name: llmaz
-description: A Helm chart for Kubernetes
-
+description: A Helm chart for llmaz
 # A chart can be either an 'application' or a 'library' chart.
 #
 # Application charts are a collection of templates that can be packaged into versioned archives
@@ -11,14 +10,12 @@ description: A Helm chart for Kubernetes
 # a dependency of application charts to inject those utilities and functions into the rendering
 # pipeline. Library charts do not define any templates and therefore cannot be deployed.
 type: application
-
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.1.0
-
+version: 0.0.1
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
-appVersion: "1.16.0"
+appVersion: "0.0.6"
diff --git a/llmaz/crds/.DS_Store b/llmaz/crds/.DS_Store
diff --git a/llmaz/crds/backendruntime-crd.yaml b/llmaz/crds/backendruntime-crd.yaml
diff --git a/llmaz/crds/openmodel-crd.yaml b/llmaz/crds/openmodel-crd.yaml
@@ -0,0 +1,236 @@
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.14.0
+  name: openmodels.llmaz.io
+spec:
+  conversion:
+    strategy: Webhook
+    webhook:
+      clientConfig:
+        service:
+          name: llmaz-webhook-service
+          namespace: llmaz-system
+          path: /convert
+      conversionReviewVersions:
+      - v1
+  group: llmaz.io
+  names:
+    kind: OpenModel
+    listKind: OpenModelList
+    plural: openmodels
+    shortNames:
+    - om
+    singular: openmodel
+  scope: Cluster
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: OpenModel is the Schema for the open models API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: ModelSpec defines the desired state of Model
+            properties:
+              familyName:
+                description: |-
+                  FamilyName represents the model type, like llama2, which will be auto injected
+                  to the labels with the key of `llmaz.io/model-family-name`.
+                type: string
+              inferenceFlavors:
+                description: |-
+                  InferenceFlavors represents the accelerator requirements to serve the model.
+                  Flavors are fungible following the priority represented by the slice order.
+                items:
+                  description: |-
+                    Flavor defines the accelerator requirements for a model and the necessary parameters
+                    in autoscaling. Right now, it will be used in two places:
+                    - Pod scheduling with node selectors specified.
+                    - Cluster autoscaling with essential parameters provided.
+                  properties:
+                    name:
+                      description: Name represents the flavor name, which will be
+                        used in model claim.
+                      type: string
+                    nodeSelector:
+                      additionalProperties:
+                        type: string
+                      description: |-
+                        NodeSelector represents the node candidates for Pod placements, if a node doesn't
+                        meet the nodeSelector, it will be filtered out in the resourceFungibility scheduler plugin.
+                        If nodeSelector is empty, it means every node is a candidate.
+                      type: object
+                    params:
+                      additionalProperties:
+                        type: string
+                      description: |-
+                        Params stores other useful parameters and will be consumed by the autoscaling components
+                        like cluster-autoscaler, Karpenter.
+                        E.g. when scaling up nodes with 8x Nvidia A00, the parameter can be injected with
+                        instance-type: p4d.24xlarge for AWS.
+                      type: object
+                    requests:
+                      additionalProperties:
+                        anyOf:
+                        - type: integer
+                        - type: string
+                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                        x-kubernetes-int-or-string: true
+                      description: |-
+                        Requests defines the required accelerators to serve the model, like nvidia.com/gpu: 8.
+                        When GPU number is greater than 8, like 32, then multi-host inference is enabled and
+                        32/8=4 hosts will be grouped as an unit, each host will have a resource request as
+                        nvidia.com/gpu: 8. The may change in the future if the GPU number limit is broken.
+                        Not recommended to set the cpu and memory usage here.
+                        If using playground, you can define the cpu/mem usage at backendConfig.
+                        If using service, you can define the cpu/mem at the container resources.
+                        Note: if you define the same accelerator requests at playground/service as well,
+                        the requests here will be covered.
+                      type: object
+                  required:
+                  - name
+                  type: object
+                maxItems: 8
+                type: array
+              source:
+                description: |-
+                  Source represents the source of the model, there're several ways to load
+                  the model such as loading from huggingface, OCI registry, s3, host path and so on.
+                properties:
+                  modelHub:
+                    description: ModelHub represents the model registry for model
+                      downloads.
+                    properties:
+                      filename:
+                        description: |-
+                          Filename refers to a specified model file rather than the whole repo.
+                          This is helpful to download a specified GGUF model rather than downloading
+                          the whole repo which includes all kinds of quantized models.
+                          TODO: this is only supported with Huggingface, add support for ModelScope
+                          in the near future.
+                        type: string
+                      modelID:
+                        description: |-
+                          ModelID refers to the model identifier on model hub,
+                          such as meta-llama/Meta-Llama-3-8B.
+                        type: string
+                      name:
+                        default: Huggingface
+                        description: Name refers to the model registry, such as huggingface.
+                        enum:
+                        - Huggingface
+                        - ModelScope
+                        type: string
+                      revision:
+                        description: |-
+                          Revision refers to a Git revision id which can be a branch name, a tag, or a commit hash.
+                          Most of the time, you don't need to specify it.
+                        type: string
+                    type: object
+                  uri:
+                    description: |-
+                      URI represents a various kinds of model sources following the uri protocol, e.g.:
+                      - OSS: oss://<bucket>.<endpoint>/<path-to-your-model>
+                    type: string
+                type: object
+            required:
+            - familyName
+            - source
+            type: object
+          status:
+            description: ModelStatus defines the observed state of Model
+            properties:
+              conditions:
+                description: Conditions represents the Inference condition.
+                items:
+                  description: "Condition contains details for one aspect of the current
+                    state of this API Resource.\n---\nThis struct is intended for
+                    direct use as an array at the field path .status.conditions.  For
+                    example,\n\n\n\ttype FooStatus struct{\n\t    // Represents the
+                    observations of a foo's current state.\n\t    // Known .status.conditions.type
+                    are: \"Available\", \"Progressing\", and \"Degraded\"\n\t    //
+                    +patchMergeKey=type\n\t    // +patchStrategy=merge\n\t    // +listType=map\n\t
+                    \   // +listMapKey=type\n\t    Conditions []metav1.Condition `json:\"conditions,omitempty\"
+                    patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
+                    \   // other fields\n\t}"
+                  properties:
+                    lastTransitionTime:
+                      description: |-
+                        lastTransitionTime is the last time the condition transitioned from one status to another.
+                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
+                      format: date-time
+                      type: string
+                    message:
+                      description: |-
+                        message is a human readable message indicating details about the transition.
+                        This may be an empty string.
+                      maxLength: 32768
+                      type: string
+                    observedGeneration:
+                      description: |-
+                        observedGeneration represents the .metadata.generation that the condition was set based upon.
+                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                        with respect to the current state of the instance.
+                      format: int64
+                      minimum: 0
+                      type: integer
+                    reason:
+                      description: |-
+                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                        Producers of specific condition types may define expected values and meanings for this field,
+                        and whether the values are considered a guaranteed API.
+                        The value should be a CamelCase string.
+                        This field may not be empty.
+                      maxLength: 1024
+                      minLength: 1
+                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                      type: string
+                    status:
+                      description: status of the condition, one of True, False, Unknown.
+                      enum:
+                      - "True"
+                      - "False"
+                      - Unknown
+                      type: string
+                    type:
+                      description: |-
+                        type of condition in CamelCase or in foo.example.com/CamelCase.
+                        ---
+                        Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
+                        useful (see .node.status.conditions), the ability to deconflict is important.
+                        The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
+                      maxLength: 316
+                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                      type: string
+                  required:
+                  - lastTransitionTime
+                  - message
+                  - reason
+                  - status
+                  - type
+                  type: object
+                type: array
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}