diff --git a/api/v1alpha1/ragengine_types.go b/api/v1alpha1/ragengine_types.go new file mode 100644 index 000000000..a5d35205e --- /dev/null +++ b/api/v1alpha1/ragengine_types.go @@ -0,0 +1,109 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +type StorageSpec struct { + //TODO: add vendor specific APIs for accessing vector DB services here. +} + +type RemoteEmbeddingSpec struct { + // URL points to a publicly available embedding service, such as OpenAI. + URL string `json:"url"` + // AccessSecret is the name of the secret that contains the service access token. + // +optional + AccessSecret string `json:"accessSecret,omitempty"` +} + +type LocalEmbeddingSpec struct { + // Image is the name of the containerized embedding model image. + // +optional + Image string `json:"image,omitempty"` + // +optional + ImagePullSecret string `json:"imagePullSecret,omitempty"` + // ModelID is the ID of the embedding model hosted by huggingface, e.g., BAAI/bge-small-en-v1.5. + // When this field is specified, the RAG engine will download the embedding model + // from huggingface repository during startup. The embedding model will not persist in local storage. + // Note that if Image is specified, ModelID should not be specified and vice versa. + // +optional + ModelID string `json:"modelID,omitempty"` + // ModelAccessSecret is the name of the secret that contains the huggingface access token. + // +optional + ModelAccessSecret string `json:"modelAccessSecret,omitempty"` +} + +type EmbeddingSpec struct { + // Remote specifies how to generate embeddings for index data using a remote service. + // Note that either Remote or Local needs to be specified, not both. + // +optional + Remote *RemoteEmbeddingSpec `json:"remote,omitempty"` + // Local specifies how to generate embeddings for index data using a model run locally. + // +optional + Local *LocalEmbeddingSpec `json:"local,omitempty"` +} + +type InferenceServiceSpec struct { + // URL points to a running inference service endpoint which accepts http(s) payload. + URL string `json:"url"` + // AccessSecret is the name of the secret that contains the service access token. + // +optional + AccessSecret string `json:"accessSecret,omitempty"` +} + +type RAGEngineSpec struct { + // Compute specifies the dedicated GPU resource used by an embedding model running locally if required. + // +optional + Compute *ResourceSpec `json:"compute,omitempty"` + // Storage specifies how to access the vector database used to save the embedding vectors. + // If this field is not specified, by default, an in-memory vector DB will be used. + // The data will not be persisted. + // +optional + Storage *StorageSpec `json:"storage,omitempty"` + // Embedding specifies whether the RAG engine generates embedding vectors using a remote service + // or using a embedding model running locally. + Embedding *EmbeddingSpec `json:"embedding"` + InferenceService *InferenceServiceSpec `json:"inferenceService"` + // QueryServiceName is the name of the service which exposes the endpoint for accepting user queries to the + // inference service. If not specified, a default service name will be created by the RAG engine. + // +optional + QueryServiceName string `json:"queryServiceName,omitempty"` + // IndexServiceName is the name of the service which exposes the endpoint for user to input the index data + // to generate embeddings. If not specified, a default service name will be created by the RAG engine. + // +optional + IndexServiceName string `json:"indexServiceName,omitempty"` +} + +// RAGEngineStatus defines the observed state of RAGEngine +type RAGEngineStatus struct { + Conditions []metav1.Condition `json:"conditions,omitempty"` +} + +// RAGEngine is the Schema for the ragengine API +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:resource:path=ragengines,scope=Namespaced,categories=ragengine +// +kubebuilder:storageversion +type RAGEngine struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec *RAGEngineSpec `json:"spec,omitempty"` + + Status RAGEngineStatus `json:"status,omitempty"` +} + +// RAGEngineList contains a list of RAGEngine +// +kubebuilder:object:root=true +type RAGEngineList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []RAGEngine `json:"items"` +} + +func init() { + SchemeBuilder.Register(&RAGEngine{}, &RAGEngineList{}) +} diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 4a0517171..ef55fed6a 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -104,6 +104,31 @@ func (in *DataSource) DeepCopy() *DataSource { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EmbeddingSpec) DeepCopyInto(out *EmbeddingSpec) { + *out = *in + if in.Remote != nil { + in, out := &in.Remote, &out.Remote + *out = new(RemoteEmbeddingSpec) + **out = **in + } + if in.Local != nil { + in, out := &in.Local, &out.Local + *out = new(LocalEmbeddingSpec) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EmbeddingSpec. +func (in *EmbeddingSpec) DeepCopy() *EmbeddingSpec { + if in == nil { + return nil + } + out := new(EmbeddingSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *GPUConfig) DeepCopyInto(out *GPUConfig) { *out = *in @@ -124,6 +149,21 @@ func (in *GPUConfig) DeepCopy() *GPUConfig { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferenceServiceSpec) DeepCopyInto(out *InferenceServiceSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceServiceSpec. +func (in *InferenceServiceSpec) DeepCopy() *InferenceServiceSpec { + if in == nil { + return nil + } + out := new(InferenceServiceSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *InferenceSpec) DeepCopyInto(out *InferenceSpec) { *out = *in @@ -156,6 +196,21 @@ func (in *InferenceSpec) DeepCopy() *InferenceSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LocalEmbeddingSpec) DeepCopyInto(out *LocalEmbeddingSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LocalEmbeddingSpec. +func (in *LocalEmbeddingSpec) DeepCopy() *LocalEmbeddingSpec { + if in == nil { + return nil + } + out := new(LocalEmbeddingSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PresetMeta) DeepCopyInto(out *PresetMeta) { *out = *in @@ -208,6 +263,141 @@ func (in *PresetSpec) DeepCopy() *PresetSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RAGEngine) DeepCopyInto(out *RAGEngine) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + if in.Spec != nil { + in, out := &in.Spec, &out.Spec + *out = new(RAGEngineSpec) + (*in).DeepCopyInto(*out) + } + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RAGEngine. +func (in *RAGEngine) DeepCopy() *RAGEngine { + if in == nil { + return nil + } + out := new(RAGEngine) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *RAGEngine) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RAGEngineList) DeepCopyInto(out *RAGEngineList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]RAGEngine, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RAGEngineList. +func (in *RAGEngineList) DeepCopy() *RAGEngineList { + if in == nil { + return nil + } + out := new(RAGEngineList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *RAGEngineList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RAGEngineSpec) DeepCopyInto(out *RAGEngineSpec) { + *out = *in + if in.Compute != nil { + in, out := &in.Compute, &out.Compute + *out = new(ResourceSpec) + (*in).DeepCopyInto(*out) + } + if in.Storage != nil { + in, out := &in.Storage, &out.Storage + *out = new(StorageSpec) + **out = **in + } + if in.Embedding != nil { + in, out := &in.Embedding, &out.Embedding + *out = new(EmbeddingSpec) + (*in).DeepCopyInto(*out) + } + if in.InferenceService != nil { + in, out := &in.InferenceService, &out.InferenceService + *out = new(InferenceServiceSpec) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RAGEngineSpec. +func (in *RAGEngineSpec) DeepCopy() *RAGEngineSpec { + if in == nil { + return nil + } + out := new(RAGEngineSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RAGEngineStatus) DeepCopyInto(out *RAGEngineStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RAGEngineStatus. +func (in *RAGEngineStatus) DeepCopy() *RAGEngineStatus { + if in == nil { + return nil + } + out := new(RAGEngineStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemoteEmbeddingSpec) DeepCopyInto(out *RemoteEmbeddingSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemoteEmbeddingSpec. +func (in *RemoteEmbeddingSpec) DeepCopy() *RemoteEmbeddingSpec { + if in == nil { + return nil + } + out := new(RemoteEmbeddingSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ResourceSpec) DeepCopyInto(out *ResourceSpec) { *out = *in @@ -238,6 +428,21 @@ func (in *ResourceSpec) DeepCopy() *ResourceSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *StorageSpec) DeepCopyInto(out *StorageSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StorageSpec. +func (in *StorageSpec) DeepCopy() *StorageSpec { + if in == nil { + return nil + } + out := new(StorageSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *TrainingConfig) DeepCopyInto(out *TrainingConfig) { *out = *in diff --git a/config/crd/bases/kaito.sh_ragengines.yaml b/config/crd/bases/kaito.sh_ragengines.yaml new file mode 100644 index 000000000..7b1ec3f55 --- /dev/null +++ b/config/crd/bases/kaito.sh_ragengines.yaml @@ -0,0 +1,269 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.15.0 + name: ragengines.kaito.sh +spec: + group: kaito.sh + names: + categories: + - ragengine + kind: RAGEngine + listKind: RAGEngineList + plural: ragengines + singular: ragengine + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: RAGEngine is the Schema for the ragengine API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + properties: + compute: + description: Compute specifies the dedicated GPU resource used by + an embedding model running locally if required. + properties: + count: + default: 1 + description: Count is the required number of GPU nodes. + type: integer + instanceType: + default: Standard_NC12s_v3 + description: |- + InstanceType specifies the GPU node SKU. + This field defaults to "Standard_NC12s_v3" if not specified. + type: string + labelSelector: + description: LabelSelector specifies the required labels for the + GPU nodes. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + preferredNodes: + description: |- + PreferredNodes is an optional node list specified by the user. + If a node in the list does not have the required labels or + the required instanceType, it will be ignored. + items: + type: string + type: array + required: + - labelSelector + type: object + embedding: + description: |- + Embedding specifies whether the RAG engine generates embedding vectors using a remote service + or using a embedding model running locally. + properties: + local: + description: Local specifies how to generate embeddings for index + data using a model run locally. + properties: + image: + description: Image is the name of the containerized embedding + model image. + type: string + imagePullSecret: + type: string + modelAccessSecret: + description: ModelAccessSecret is the name of the secret that + contains the huggingface access token. + type: string + modelID: + description: |- + ModelID is the ID of the embedding model hosted by huggingface. + When this field is specified, the RAG engine will download the embedding model + from huggingface repository during startup. The embedding model will not persist in local storage. + Note that if Image is specified, ModelID should not be specified and vice versa. + type: string + type: object + remote: + description: |- + Remote specifies how to generate embeddings for index data using a remote service. + Note that either Remote or Local needs to be specified, not both. + properties: + accessSecret: + description: AccessSecret is the name of the secret that contains + the service access token. + type: string + url: + description: URL points to a publicly available embedding + service, such as OpenAI. + type: string + required: + - url + type: object + type: object + indexServiceName: + description: |- + IndexServiceName is the name of the service which exposes the endpoint for user to input the index data + to generate embeddings. If not specified, a default service name will be created by the RAG engine. + type: string + inferencService: + properties: + accessSecret: + description: AccessSecret is the name of the secret that contains + the service access token. + type: string + url: + description: URL points to a running inference service endpoint + which accepts http(s) payload. + type: string + required: + - url + type: object + queryServiceName: + description: |- + QueryServiceName is the name of the service which exposes the endpoint for accepting user queries to the + inference service. If not specified, a default service name will be created by the RAG engine. + type: string + storage: + description: |- + Storage specifies how to access the vector database used to save the embedding vectors. + If this field is not specified, by default, an in-memoty vector DB will be used. + The data will not be persisted. + type: object + required: + - embedding + - inferencService + type: object + status: + description: RAGEngineStatus defines the observed state of RAGEngine + properties: + conditions: + items: + description: "Condition contains details for one aspect of the current + state of this API Resource.\n---\nThis struct is intended for + direct use as an array at the field path .status.conditions. For + example,\n\n\n\ttype FooStatus struct{\n\t // Represents the + observations of a foo's current state.\n\t // Known .status.conditions.type + are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // + +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t + \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" + patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t + \ // other fields\n\t}" + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/presets/models/falcon/model.go b/presets/models/falcon/model.go index a94fa81f2..74c39995f 100644 --- a/presets/models/falcon/model.go +++ b/presets/models/falcon/model.go @@ -45,8 +45,8 @@ var ( } baseCommandPresetFalconInference = "accelerate launch" - baseCommandPresetFalconTuning = "python3 metrics_server.py & accelerate launch" - falconRunParams = map[string]string{ + baseCommandPresetFalconTuning = "python3 metrics_server.py & accelerate launch" + falconRunParams = map[string]string{ "torch_dtype": "bfloat16", "pipeline": "text-generation", } diff --git a/presets/models/mistral/model.go b/presets/models/mistral/model.go index ebab6fbe9..b4581d6f1 100644 --- a/presets/models/mistral/model.go +++ b/presets/models/mistral/model.go @@ -32,8 +32,8 @@ var ( } baseCommandPresetMistralInference = "accelerate launch" - baseCommandPresetMistralTuning = "python3 metrics_server.py & accelerate launch" - mistralRunParams = map[string]string{ + baseCommandPresetMistralTuning = "python3 metrics_server.py & accelerate launch" + mistralRunParams = map[string]string{ "torch_dtype": "bfloat16", "pipeline": "text-generation", } diff --git a/presets/models/phi2/model.go b/presets/models/phi2/model.go index 731043f11..07fb8e0d2 100644 --- a/presets/models/phi2/model.go +++ b/presets/models/phi2/model.go @@ -26,8 +26,8 @@ var ( } baseCommandPresetPhiInference = "accelerate launch" - baseCommandPresetPhiTuning = "python3 metrics_server.py & accelerate launch" - phiRunParams = map[string]string{ + baseCommandPresetPhiTuning = "python3 metrics_server.py & accelerate launch" + phiRunParams = map[string]string{ "torch_dtype": "float16", "pipeline": "text-generation", } diff --git a/presets/models/phi3/model.go b/presets/models/phi3/model.go index c645b99e5..5656fc15a 100644 --- a/presets/models/phi3/model.go +++ b/presets/models/phi3/model.go @@ -44,8 +44,8 @@ var ( } baseCommandPresetPhiInference = "accelerate launch" - baseCommandPresetPhiTuning = "python3 metrics_server.py & accelerate launch" - phiRunParams = map[string]string{ + baseCommandPresetPhiTuning = "python3 metrics_server.py & accelerate launch" + phiRunParams = map[string]string{ "torch_dtype": "auto", "pipeline": "text-generation", "trust_remote_code": "",