DistributedAI_Go is a Go library and framework for building and managing distributed AI inference services. It leverages gRPC for high-performance inter-service communication and is designed to be easily deployable on container orchestration platforms like Kubernetes. This project aims to provide a robust and scalable solution for serving machine learning models in a distributed, microservices-oriented architecture.
- gRPC-based Communication: Efficient and language-agnostic communication between AI services.
- Scalable Inference: Design patterns for horizontally scaling AI model serving.
- Kubernetes Integration: Examples and configurations for deploying distributed AI services on Kubernetes.
- Model Agnostic: Can serve models from various frameworks (e.g., TensorFlow, PyTorch) via a common interface.
- Health Checks & Monitoring: Built-in mechanisms for service health and performance monitoring.
- Load Balancing: Strategies for distributing inference requests across multiple service instances.
To get started with DistributedAI_Go, clone the repository and build the project:
git clone https://github.com/Eation5/DistributedAI_Go.git
cd DistributedAI_Go
go mod tidy
go build ./...Here's a simplified example of a gRPC server and client for an AI inference service:
syntax = "proto3";
package inference;
option go_package = "./inference";
message InferenceRequest {
repeated float input_data = 1;
}
message InferenceResponse {
repeated float output_data = 1;
}
service InferenceService {
rpc Predict (InferenceRequest) returns (InferenceResponse);
}package main
import (
"context"
"log"
"net"
"google.golang.org/grpc"
pb "DistributedAI_Go/inference"
)
type server struct {
pb.UnimplementedInferenceServiceServer
}
func (s *server) Predict(ctx context.Context, in *pb.InferenceRequest) (*pb.InferenceResponse, error) {
log.Printf("Received inference request with %d data points", len(in.InputData))
// Simulate AI model inference
output := make([]float32, len(in.InputData))
for i, val := range in.InputData {
output[i] = val * 2.0 // Simple multiplication as dummy inference
}
return &pb.InferenceResponse{OutputData: output}, nil
}
func main() {
lis, err := net.Listen("tcp", ":50051")
if err != nil {
log.Fatalf("failed to listen: %v", err)
}
s := grpc.NewServer()
pb.RegisterInferenceServiceServer(s, &server{})
log.Printf("server listening at %v", lis.Addr())
if err := s.Serve(lis); err != nil {
log.Fatalf("failed to serve: %v", err)
}
}package main
import (
"context"
"log"
"time"
"google.golang.org/grpc"
pb "DistributedAI_Go/inference"
)
func main() {
conn, err := grpc.Dial("localhost:50051", grpc.WithInsecure(), grpc.WithBlock())
if err != nil {
log.Fatalf("did not connect: %v", err)
}
defer conn.Close()
c := pb.NewInferenceServiceClient(conn)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
// Prepare dummy input data
inputData := []float32{1.0, 2.0, 3.0, 4.0, 5.0}
req := &pb.InferenceRequest{InputData: inputData}
r, err := c.Predict(ctx, req)
if err != nil {
log.Fatalf("could not predict: %v", err)
}
log.Printf("Inference Result: %v", r.OutputData)
}DistributedAI_Go/
├── README.md
├── go.mod
├── go.sum
├── inference/
│ ├── inference.proto
│ └── inference_grpc.pb.go
│ └── inference.pb.go
├── server/
│ └── main.go
└── client/
└── main.go
We welcome contributions! Please see CONTRIBUTING.md for details on how to get started.
This project is licensed under the MIT License - see the LICENSE file for details.
For any inquiries, please open an issue on GitHub or contact Matthew Wilson at matthew.wilson.ai@example.com.