001/* 002 * SPDX-License-Identifier: Apache-2.0 003 * 004 * Copyright 2024-2026 The Enola <https://enola.dev> Authors 005 * 006 * Licensed under the Apache License, Version 2.0 (the "License"); 007 * you may not use this file except in compliance with the License. 008 * You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package dev.enola.model.enola.mediatype; 019 020import com.google.common.base.Strings; 021 022import dev.enola.common.convert.ConversionException; 023import dev.enola.common.io.mediatype.MediaTypes; 024import dev.enola.format.tika.TikaMediaTypes; 025import dev.enola.thing.io.UriIntoThingConverter; 026import dev.enola.thing.repo.ThingRepositoryStore; 027 028import org.apache.tika.mime.MimeTypeException; 029import org.apache.tika.mime.MimeTypes; 030import org.apache.tika.parser.AutoDetectParser; 031import org.slf4j.Logger; 032import org.slf4j.LoggerFactory; 033 034import java.io.IOException; 035import java.net.URI; 036 037/** 038 * Converts Tika Media Types into Enola Things (AKA RDF / TTL). 039 * 040 * <p>Tika reads these from its (builtin) <a 041 * href="https://github.com/apache/tika/blob/116edb30dc5fd26770216ccffcf873f4952a5c2a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml">MIME 042 * Types XML</a> file which corresponds to the <a 043 * href="https://freedesktop.org/wiki/Specifications/shared-mime-info-spec/">freedesktop.org MIME 044 * Info Spec</a>. 045 */ 046public class TikaMediaTypesThingConverter implements UriIntoThingConverter { 047 048 // TODO Make https://enola.dev/fileExtensions be links, not text? 049 050 private static final Logger LOG = LoggerFactory.getLogger(TikaMediaTypesThingConverter.class); 051 052 public static final URI IRI = URI.create("enola:TikaMediaTypes"); 053 054 @Override 055 public boolean convertInto(URI from, ThingRepositoryStore into) 056 throws ConversionException, IOException { 057 if (!IRI.equals(from)) return false; 058 059 // NB: Similar code in TikaMediaTypeProvider 060 var tikaMimeTypes = MimeTypes.getDefaultMimeTypes(); 061 var tikaMediaTypeRegistry = new AutoDetectParser().getMediaTypeRegistry(); 062 var tikaMediaTypes = tikaMediaTypeRegistry.getTypes(); 063 for (var tikaMediaType : tikaMediaTypes) { 064 var mediaTypeName = tikaMediaType.toString(); 065 try { 066 var tikaMimeType = tikaMimeTypes.getRegisteredMimeType(mediaTypeName); 067 var iri = toIRI(tikaMediaType); 068 MediaType.Builder thing = 069 into.getBuilder(iri, MediaType.class, MediaType.Builder.class); 070 thing.addType("https://enola.dev/MediaType"); 071 thing.mediaType(tikaMimeType.getName()); 072 073 var label = tikaMimeType.getAcronym(); 074 if (!Strings.isNullOrEmpty(label)) thing.label(label); 075 else thing.label(tikaMimeType.getName()); 076 077 thing.comment(tikaMimeType.getDescription()); 078 thing.addAllFileExtensions(tikaMimeType.getExtensions()); 079 thing.addAllSeeAlso( 080 tikaMimeType.getLinks().stream().map(uri -> uri.toString()).toList()); 081 082 // TODO var uniformTypeIdentifier = tikaMimeType.getUniformTypeIdentifier(); 083 // TODO var hasMagic = tikaMimeType.hasMagic(); 084 085 // TODO Tika hard-codes :( a few special cases, and doesn't e.g. do +json... 086 var superType = tikaMediaTypeRegistry.getSupertype(tikaMediaType); 087 if (superType != null) thing.parentIRI(toIRI(superType)); 088 089 // tikaMediaType.getBaseType() is a superset of getSupertype() 090 091 // TODO Making the following a 1 liner... 092 // TODO Remove this once children are automagically set by generic Inference!! 093 // TODO Uncomment, once GraphvizGenerator more nicely coalesces parent & children 094 /* 095 var children = tikaMediaTypeRegistry.getChildTypes(tikaMediaType); 096 if (!children.isEmpty()) { 097 var childrenIRI = ImmutableSet.<Link>builderWithExpectedSize(children.size()); 098 for (var child : children) { 099 childrenIRI.add(new Link(toIRI(child))); 100 } 101 thing.childrenIRI(childrenIRI.build()); 102 } 103 */ 104 105 into.store(thing.build()); 106 107 } catch (MimeTypeException e) { 108 LOG.warn("MediaType not found: {}", mediaTypeName, e); 109 } 110 } 111 return true; 112 } 113 114 private String toIRI(org.apache.tika.mime.MediaType tikaMediaType) { 115 return MediaTypes.toIRI(TikaMediaTypes.toGuava(tikaMediaType)); 116 } 117}