001/*
002 * SPDX-License-Identifier: Apache-2.0
003 *
004 * Copyright 2025-2026 The Enola <https://enola.dev> Authors
005 *
006 * Licensed under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 *     https://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package dev.enola.common.protobuf.schema;
019
020import com.google.common.collect.ImmutableMap;
021import com.google.genai.types.Schema;
022import com.google.genai.types.Type.Known;
023import com.google.protobuf.Descriptors.Descriptor;
024import com.google.protobuf.Descriptors.FieldDescriptor;
025import com.google.protobuf.Message;
026
027import org.jspecify.annotations.Nullable;
028
029/**
030 * Conversion from <a href="https://protobuf.dev">Protocol Buffers</a> (model, not instances) to <a
031 * href="https://googleapis.github.io/js-genai/release_docs/interfaces/types.Schema.html">Google
032 * GenAI Schema</a> (which is based on <a
033 * href="https://spec.openapis.org/oas/v3.0.3#schema-object">OpenAPI 3.0 schema</a>, which in turn
034 * is very much like <a href="https://json-schema.org">JSON Schema</a>).
035 *
036 * <p>The Schema matches how a Proto message (instance, not model) would be serialized in <a
037 * href="https://protobuf.dev/programming-guides/json/">ProtoJSON format</a>.
038 *
039 * <p>Other than just being 💃 fun, this is (very) useful to be able to 🧑🏽‍🍼 feed 🥄 Language
040 * Models, such as Google 🔮 Gemini, 🥢 Protos instead of only 📜 text, for 🧰 tools (whether
041 * through MCP 🔱 or not), or to configure their <a
042 * href="https://github.com/googleapis/java-genai#generate-content-with-json-response-schema">expected
043 * structured output</a>.
044 *
045 * @author <a href="http://www.vorburger.ch">Michael Vorburger.ch</a>
046 */
047public class MessageDescriptorToSchemaConverter {
048
049    // TODO Research and compare with any existing prior art, if any?
050    // TODO Announce this to ProtoBuf group
051
052    public Schema convert(Message message) {
053        return convert(message.getDescriptorForType());
054    }
055
056    public Schema convert(Descriptor descriptor) {
057        var schema = Schema.builder();
058        schema.type(Known.OBJECT);
059        schema.title(descriptor.getFullName());
060
061        var properties = ImmutableMap.<String, Schema>builder();
062        for (var field : descriptor.getFields()) {
063            var fieldSchema = Schema.builder();
064            var name = field.getJsonName();
065            var type = field.getType();
066            fieldSchema.type(type(type));
067            var format = format(field.getType());
068            if (format != null) fieldSchema.format(format);
069            properties.put(name, fieldSchema.build());
070        }
071        schema.properties(properties.build());
072
073        // TODO schema.propertyOrdering()
074
075        return schema.build();
076    }
077
078    private Known type(FieldDescriptor.Type type) {
079        // TODO Handle Timestamp specially
080        // https://protobuf.dev/programming-guides/proto3/#scalar
081        // https://spec.openapis.org/oas/v3.0.3#data-types
082        return switch (type) {
083            case STRING, BYTES -> Known.STRING;
084            case BOOL -> Known.BOOLEAN;
085            case DOUBLE, FLOAT -> Known.NUMBER;
086
087            case INT32,
088                    INT64,
089                    UINT32,
090                    UINT64,
091                    SINT32,
092                    SINT64,
093                    FIXED32,
094                    FIXED64,
095                    SFIXED32,
096                    SFIXED64 ->
097                    Known.INTEGER;
098
099            case MESSAGE -> Known.OBJECT;
100            // TODO Handle ENUM
101            // TODO Handle GROUP
102            case ENUM, GROUP -> throw new UnsupportedOperationException(type.name());
103        };
104    }
105
106    private @Nullable String format(FieldDescriptor.Type type) {
107        // TODO Handle Timestamp specially and return "date-time"
108        return switch (type) {
109            case DOUBLE -> "double";
110            case FLOAT -> "float";
111            case INT64, UINT64, FIXED64, SFIXED64, SINT64 -> "int64";
112            case INT32, UINT32, FIXED32, SFIXED32, SINT32 -> "int32";
113            // BYTES is "byte" (base64), NOT "binary" ('any sequence of octets')
114            case BYTES -> "byte";
115            default -> null;
116        };
117    }
118}