From 2cdc6826964f36744952f750b07471069cac681f Mon Sep 17 00:00:00 2001 From: Fei Guo Date: Wed, 18 Sep 2024 16:48:57 -0700 Subject: [PATCH] feat: Add RAGEngine CRD (#597) This PR adds the initial draft for the RAGEngine CRD in Kaito. A RAGEngine CRD defines all resources needed to run a RAG on top of a LLM inference service. Upon creating a RAGEngine CR, a new controller will create a deployment which runs a RAG engine instance. The instance provides http endpoints for both `index` and `query` services. The instance can optionally choose a public model embedding service or run a local embedding model with GPU to convert the input index data to vectors. The instance can also connect to a Vector DB instance to persist the vectors db or by default using an in-memory vector DB. The instance uses the `llamaIndex` library to orchestrate the workflow. When RAGEngine instance is up and running, users should send questions to the `query` endpoint of RAG instance instead of the normal `chat` endpoint in the inference service. The RAGEngine is intended to be "standalone". It can use any public inference service or inference services hosted by Kaito workspace. The RAG engine instance is designed to help retrieve prompts from unstructured data (arbitrary index data provided by the users). Retrieving from structured data or search engine is out of the scope for now. --- api/v1alpha1/ragengine_types.go | 109 +++++++++ api/v1alpha1/zz_generated.deepcopy.go | 205 +++++++++++++++++ config/crd/bases/kaito.sh_ragengines.yaml | 269 ++++++++++++++++++++++ presets/models/falcon/model.go | 4 +- presets/models/mistral/model.go | 4 +- presets/models/phi2/model.go | 4 +- presets/models/phi3/model.go | 4 +- 7 files changed, 591 insertions(+), 8 deletions(-) create mode 100644 api/v1alpha1/ragengine_types.go create mode 100644 config/crd/bases/kaito.sh_ragengines.yaml diff --git a/api/v1alpha1/ragengine_types.go b/api/v1alpha1/ragengine_types.go new file mode 100644 index 000000000..a5d35205e --- /dev/null +++ b/api/v1alpha1/ragengine_types.go @@ -0,0 +1,109 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +type StorageSpec struct { + //TODO: add vendor specific APIs for accessing vector DB services here. +} + +type RemoteEmbeddingSpec struct { + // URL points to a publicly available embedding service, such as OpenAI. + URL string `json:"url"` + // AccessSecret is the name of the secret that contains the service access token. + // +optional + AccessSecret string `json:"accessSecret,omitempty"` +} + +type LocalEmbeddingSpec struct { + // Image is the name of the containerized embedding model image. + // +optional + Image string `json:"image,omitempty"` + // +optional + ImagePullSecret string `json:"imagePullSecret,omitempty"` + // ModelID is the ID of the embedding model hosted by huggingface, e.g., BAAI/bge-small-en-v1.5. + // When this field is specified, the RAG engine will download the embedding model + // from huggingface repository during startup. The embedding model will not persist in local storage. + // Note that if Image is specified, ModelID should not be specified and vice versa. + // +optional + ModelID string `json:"modelID,omitempty"` + // ModelAccessSecret is the name of the secret that contains the huggingface access token. + // +optional + ModelAccessSecret string `json:"modelAccessSecret,omitempty"` +} + +type EmbeddingSpec struct { + // Remote specifies how to generate embeddings for index data using a remote service. + // Note that either Remote or Local needs to be specified, not both. + // +optional + Remote *RemoteEmbeddingSpec `json:"remote,omitempty"` + // Local specifies how to generate embeddings for index data using a model run locally. + // +optional + Local *LocalEmbeddingSpec `json:"local,omitempty"` +} + +type InferenceServiceSpec struct { + // URL points to a running inference service endpoint which accepts http(s) payload. + URL string `json:"url"` + // AccessSecret is the name of the secret that contains the service access token. + // +optional + AccessSecret string `json:"accessSecret,omitempty"` +} + +type RAGEngineSpec struct { + // Compute specifies the dedicated GPU resource used by an embedding model running locally if required. + // +optional + Compute *ResourceSpec `json:"compute,omitempty"` + // Storage specifies how to access the vector database used to save the embedding vectors. + // If this field is not specified, by default, an in-memory vector DB will be used. + // The data will not be persisted. + // +optional + Storage *StorageSpec `json:"storage,omitempty"` + // Embedding specifies whether the RAG engine generates embedding vectors using a remote service + // or using a embedding model running locally. + Embedding *EmbeddingSpec `json:"embedding"` + InferenceService *InferenceServiceSpec `json:"inferenceService"` + // QueryServiceName is the name of the service which exposes the endpoint for accepting user queries to the + // inference service. If not specified, a default service name will be created by the RAG engine. + // +optional + QueryServiceName string `json:"queryServiceName,omitempty"` + // IndexServiceName is the name of the service which exposes the endpoint for user to input the index data + // to generate embeddings. If not specified, a default service name will be created by the RAG engine. + // +optional + IndexServiceName string `json:"indexServiceName,omitempty"` +} + +// RAGEngineStatus defines the observed state of RAGEngine +type RAGEngineStatus struct { + Conditions []metav1.Condition `json:"conditions,omitempty"` +} + +// RAGEngine is the Schema for the ragengine API +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:resource:path=ragengines,scope=Namespaced,categories=ragengine +// +kubebuilder:storageversion +type RAGEngine struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec *RAGEngineSpec `json:"spec,omitempty"` + + Status RAGEngineStatus `json:"status,omitempty"` +} + +// RAGEngineList contains a list of RAGEngine +// +kubebuilder:object:root=true +type RAGEngineList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []RAGEngine `json:"items"` +} + +func init() { + SchemeBuilder.Register(&RAGEngine{}, &RAGEngineList{}) +} diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 4a0517171..ef55fed6a 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -104,6 +104,31 @@ func (in *DataSource) DeepCopy() *DataSource { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EmbeddingSpec) DeepCopyInto(out *EmbeddingSpec) { + *out = *in + if in.Remote != nil { + in, out := &in.Remote, &out.Remote + *out = new(RemoteEmbeddingSpec) + **out = **in + } + if in.Local != nil { + in, out := &in.Local, &out.Local + *out = new(LocalEmbeddingSpec) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EmbeddingSpec. +func (in *EmbeddingSpec) DeepCopy() *EmbeddingSpec { + if in == nil { + return nil + } + out := new(EmbeddingSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *GPUConfig) DeepCopyInto(out *GPUConfig) { *out = *in @@ -124,6 +149,21 @@ func (in *GPUConfig) DeepCopy() *GPUConfig { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferenceServiceSpec) DeepCopyInto(out *InferenceServiceSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceServiceSpec. +func (in *InferenceServiceSpec) DeepCopy() *InferenceServiceSpec { + if in == nil { + return nil + } + out := new(InferenceServiceSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *InferenceSpec) DeepCopyInto(out *InferenceSpec) { *out = *in @@ -156,6 +196,21 @@ func (in *InferenceSpec) DeepCopy() *InferenceSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LocalEmbeddingSpec) DeepCopyInto(out *LocalEmbeddingSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LocalEmbeddingSpec. +func (in *LocalEmbeddingSpec) DeepCopy() *LocalEmbeddingSpec { + if in == nil { + return nil + } + out := new(LocalEmbeddingSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PresetMeta) DeepCopyInto(out *PresetMeta) { *out = *in @@ -208,6 +263,141 @@ func (in *PresetSpec) DeepCopy() *PresetSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RAGEngine) DeepCopyInto(out *RAGEngine) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + if in.Spec != nil { + in, out := &in.Spec, &out.Spec + *out = new(RAGEngineSpec) + (*in).DeepCopyInto(*out) + } + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RAGEngine. +func (in *RAGEngine) DeepCopy() *RAGEngine { + if in == nil { + return nil + } + out := new(RAGEngine) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *RAGEngine) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RAGEngineList) DeepCopyInto(out *RAGEngineList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]RAGEngine, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RAGEngineList. +func (in *RAGEngineList) DeepCopy() *RAGEngineList { + if in == nil { + return nil + } + out := new(RAGEngineList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *RAGEngineList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RAGEngineSpec) DeepCopyInto(out *RAGEngineSpec) { + *out = *in + if in.Compute != nil { + in, out := &in.Compute, &out.Compute + *out = new(ResourceSpec) + (*in).DeepCopyInto(*out) + } + if in.Storage != nil { + in, out := &in.Storage, &out.Storage + *out = new(StorageSpec) + **out = **in + } + if in.Embedding != nil { + in, out := &in.Embedding, &out.Embedding + *out = new(EmbeddingSpec) + (*in).DeepCopyInto(*out) + } + if in.InferenceService != nil { + in, out := &in.InferenceService, &out.InferenceService + *out = new(InferenceServiceSpec) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RAGEngineSpec. +func (in *RAGEngineSpec) DeepCopy() *RAGEngineSpec { + if in == nil { + return nil + } + out := new(RAGEngineSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RAGEngineStatus) DeepCopyInto(out *RAGEngineStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RAGEngineStatus. +func (in *RAGEngineStatus) DeepCopy() *RAGEngineStatus { + if in == nil { + return nil + } + out := new(RAGEngineStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemoteEmbeddingSpec) DeepCopyInto(out *RemoteEmbeddingSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemoteEmbeddingSpec. +func (in *RemoteEmbeddingSpec) DeepCopy() *RemoteEmbeddingSpec { + if in == nil { + return nil + } + out := new(RemoteEmbeddingSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ResourceSpec) DeepCopyInto(out *ResourceSpec) { *out = *in @@ -238,6 +428,21 @@ func (in *ResourceSpec) DeepCopy() *ResourceSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *StorageSpec) DeepCopyInto(out *StorageSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StorageSpec. +func (in *StorageSpec) DeepCopy() *StorageSpec { + if in == nil { + return nil + } + out := new(StorageSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *TrainingConfig) DeepCopyInto(out *TrainingConfig) { *out = *in diff --git a/config/crd/bases/kaito.sh_ragengines.yaml b/config/crd/bases/kaito.sh_ragengines.yaml new file mode 100644 index 000000000..7b1ec3f55 --- /dev/null +++ b/config/crd/bases/kaito.sh_ragengines.yaml @@ -0,0 +1,269 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.15.0 + name: ragengines.kaito.sh +spec: + group: kaito.sh + names: + categories: + - ragengine + kind: RAGEngine + listKind: RAGEngineList + plural: ragengines + singular: ragengine + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: RAGEngine is the Schema for the ragengine API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + properties: + compute: + description: Compute specifies the dedicated GPU resource used by + an embedding model running locally if required. + properties: + count: + default: 1 + description: Count is the required number of GPU nodes. + type: integer + instanceType: + default: Standard_NC12s_v3 + description: |- + InstanceType specifies the GPU node SKU. + This field defaults to "Standard_NC12s_v3" if not specified. + type: string + labelSelector: + description: LabelSelector specifies the required labels for the + GPU nodes. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + preferredNodes: + description: |- + PreferredNodes is an optional node list specified by the user. + If a node in the list does not have the required labels or + the required instanceType, it will be ignored. + items: + type: string + type: array + required: + - labelSelector + type: object + embedding: + description: |- + Embedding specifies whether the RAG engine generates embedding vectors using a remote service + or using a embedding model running locally. + properties: + local: + description: Local specifies how to generate embeddings for index + data using a model run locally. + properties: + image: + description: Image is the name of the containerized embedding + model image. + type: string + imagePullSecret: + type: string + modelAccessSecret: + description: ModelAccessSecret is the name of the secret that + contains the huggingface access token. + type: string + modelID: + description: |- + ModelID is the ID of the embedding model hosted by huggingface. + When this field is specified, the RAG engine will download the embedding model + from huggingface repository during startup. The embedding model will not persist in local storage. + Note that if Image is specified, ModelID should not be specified and vice versa. + type: string + type: object + remote: + description: |- + Remote specifies how to generate embeddings for index data using a remote service. + Note that either Remote or Local needs to be specified, not both. + properties: + accessSecret: + description: AccessSecret is the name of the secret that contains + the service access token. + type: string + url: + description: URL points to a publicly available embedding + service, such as OpenAI. + type: string + required: + - url + type: object + type: object + indexServiceName: + description: |- + IndexServiceName is the name of the service which exposes the endpoint for user to input the index data + to generate embeddings. If not specified, a default service name will be created by the RAG engine. + type: string + inferencService: + properties: + accessSecret: + description: AccessSecret is the name of the secret that contains + the service access token. + type: string + url: + description: URL points to a running inference service endpoint + which accepts http(s) payload. + type: string + required: + - url + type: object + queryServiceName: + description: |- + QueryServiceName is the name of the service which exposes the endpoint for accepting user queries to the + inference service. If not specified, a default service name will be created by the RAG engine. + type: string + storage: + description: |- + Storage specifies how to access the vector database used to save the embedding vectors. + If this field is not specified, by default, an in-memoty vector DB will be used. + The data will not be persisted. + type: object + required: + - embedding + - inferencService + type: object + status: + description: RAGEngineStatus defines the observed state of RAGEngine + properties: + conditions: + items: + description: "Condition contains details for one aspect of the current + state of this API Resource.\n---\nThis struct is intended for + direct use as an array at the field path .status.conditions. For + example,\n\n\n\ttype FooStatus struct{\n\t // Represents the + observations of a foo's current state.\n\t // Known .status.conditions.type + are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // + +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t + \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" + patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t + \ // other fields\n\t}" + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/presets/models/falcon/model.go b/presets/models/falcon/model.go index a94fa81f2..74c39995f 100644 --- a/presets/models/falcon/model.go +++ b/presets/models/falcon/model.go @@ -45,8 +45,8 @@ var ( } baseCommandPresetFalconInference = "accelerate launch" - baseCommandPresetFalconTuning = "python3 metrics_server.py & accelerate launch" - falconRunParams = map[string]string{ + baseCommandPresetFalconTuning = "python3 metrics_server.py & accelerate launch" + falconRunParams = map[string]string{ "torch_dtype": "bfloat16", "pipeline": "text-generation", } diff --git a/presets/models/mistral/model.go b/presets/models/mistral/model.go index ebab6fbe9..b4581d6f1 100644 --- a/presets/models/mistral/model.go +++ b/presets/models/mistral/model.go @@ -32,8 +32,8 @@ var ( } baseCommandPresetMistralInference = "accelerate launch" - baseCommandPresetMistralTuning = "python3 metrics_server.py & accelerate launch" - mistralRunParams = map[string]string{ + baseCommandPresetMistralTuning = "python3 metrics_server.py & accelerate launch" + mistralRunParams = map[string]string{ "torch_dtype": "bfloat16", "pipeline": "text-generation", } diff --git a/presets/models/phi2/model.go b/presets/models/phi2/model.go index 731043f11..07fb8e0d2 100644 --- a/presets/models/phi2/model.go +++ b/presets/models/phi2/model.go @@ -26,8 +26,8 @@ var ( } baseCommandPresetPhiInference = "accelerate launch" - baseCommandPresetPhiTuning = "python3 metrics_server.py & accelerate launch" - phiRunParams = map[string]string{ + baseCommandPresetPhiTuning = "python3 metrics_server.py & accelerate launch" + phiRunParams = map[string]string{ "torch_dtype": "float16", "pipeline": "text-generation", } diff --git a/presets/models/phi3/model.go b/presets/models/phi3/model.go index c645b99e5..5656fc15a 100644 --- a/presets/models/phi3/model.go +++ b/presets/models/phi3/model.go @@ -44,8 +44,8 @@ var ( } baseCommandPresetPhiInference = "accelerate launch" - baseCommandPresetPhiTuning = "python3 metrics_server.py & accelerate launch" - phiRunParams = map[string]string{ + baseCommandPresetPhiTuning = "python3 metrics_server.py & accelerate launch" + phiRunParams = map[string]string{ "torch_dtype": "auto", "pipeline": "text-generation", "trust_remote_code": "",