001/* 002 * SPDX-License-Identifier: Apache-2.0 003 * 004 * Copyright 2025-2026 The Enola <https://enola.dev> Authors 005 * 006 * Licensed under the Apache License, Version 2.0 (the "License"); 007 * you may not use this file except in compliance with the License. 008 * You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package dev.enola.common.protobuf.schema; 019 020import com.google.common.collect.ImmutableMap; 021import com.google.genai.types.Schema; 022import com.google.genai.types.Type.Known; 023import com.google.protobuf.Descriptors.Descriptor; 024import com.google.protobuf.Descriptors.FieldDescriptor; 025import com.google.protobuf.Message; 026 027import org.jspecify.annotations.Nullable; 028 029/** 030 * Conversion from <a href="https://protobuf.dev">Protocol Buffers</a> (model, not instances) to <a 031 * href="https://googleapis.github.io/js-genai/release_docs/interfaces/types.Schema.html">Google 032 * GenAI Schema</a> (which is based on <a 033 * href="https://spec.openapis.org/oas/v3.0.3#schema-object">OpenAPI 3.0 schema</a>, which in turn 034 * is very much like <a href="https://json-schema.org">JSON Schema</a>). 035 * 036 * <p>The Schema matches how a Proto message (instance, not model) would be serialized in <a 037 * href="https://protobuf.dev/programming-guides/json/">ProtoJSON format</a>. 038 * 039 * <p>Other than just being 💃 fun, this is (very) useful to be able to 🧑🏽🍼 feed 🥄 Language 040 * Models, such as Google 🔮 Gemini, 🥢 Protos instead of only 📜 text, for 🧰 tools (whether 041 * through MCP 🔱 or not), or to configure their <a 042 * href="https://github.com/googleapis/java-genai#generate-content-with-json-response-schema">expected 043 * structured output</a>. 044 * 045 * @author <a href="http://www.vorburger.ch">Michael Vorburger.ch</a> 046 */ 047public class MessageDescriptorToSchemaConverter { 048 049 // TODO Research and compare with any existing prior art, if any? 050 // TODO Announce this to ProtoBuf group 051 052 public Schema convert(Message message) { 053 return convert(message.getDescriptorForType()); 054 } 055 056 public Schema convert(Descriptor descriptor) { 057 var schema = Schema.builder(); 058 schema.type(Known.OBJECT); 059 schema.title(descriptor.getFullName()); 060 061 var properties = ImmutableMap.<String, Schema>builder(); 062 for (var field : descriptor.getFields()) { 063 var fieldSchema = Schema.builder(); 064 var name = field.getJsonName(); 065 var type = field.getType(); 066 fieldSchema.type(type(type)); 067 var format = format(field.getType()); 068 if (format != null) fieldSchema.format(format); 069 properties.put(name, fieldSchema.build()); 070 } 071 schema.properties(properties.build()); 072 073 // TODO schema.propertyOrdering() 074 075 return schema.build(); 076 } 077 078 private Known type(FieldDescriptor.Type type) { 079 // TODO Handle Timestamp specially 080 // https://protobuf.dev/programming-guides/proto3/#scalar 081 // https://spec.openapis.org/oas/v3.0.3#data-types 082 return switch (type) { 083 case STRING, BYTES -> Known.STRING; 084 case BOOL -> Known.BOOLEAN; 085 case DOUBLE, FLOAT -> Known.NUMBER; 086 087 case INT32, 088 INT64, 089 UINT32, 090 UINT64, 091 SINT32, 092 SINT64, 093 FIXED32, 094 FIXED64, 095 SFIXED32, 096 SFIXED64 -> 097 Known.INTEGER; 098 099 case MESSAGE -> Known.OBJECT; 100 // TODO Handle ENUM 101 // TODO Handle GROUP 102 case ENUM, GROUP -> throw new UnsupportedOperationException(type.name()); 103 }; 104 } 105 106 private @Nullable String format(FieldDescriptor.Type type) { 107 // TODO Handle Timestamp specially and return "date-time" 108 return switch (type) { 109 case DOUBLE -> "double"; 110 case FLOAT -> "float"; 111 case INT64, UINT64, FIXED64, SFIXED64, SINT64 -> "int64"; 112 case INT32, UINT32, FIXED32, SFIXED32, SINT32 -> "int32"; 113 // BYTES is "byte" (base64), NOT "binary" ('any sequence of octets') 114 case BYTES -> "byte"; 115 default -> null; 116 }; 117 } 118}