Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,11 @@ public List<DataType> visit(ArrayType arrayType) {
return Collections.singletonList(arrayType.getElementType());
}

@Override
public List<DataType> visit(VectorType vectorType) {
return Collections.singletonList(vectorType.getElementType());
}

@Override
public List<DataType> visit(MultisetType multisetType) {
return Collections.singletonList(multisetType.getElementType());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ public R visit(ArrayType arrayType) {
return defaultMethod(arrayType);
}

@Override
public R visit(VectorType vectorType) {
return defaultMethod(vectorType);
}

@Override
public R visit(MultisetType multisetType) {
return defaultMethod(multisetType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ public static DataType parseDataType(JsonNode json, AtomicInteger fieldId) {
if (typeString.startsWith("ARRAY")) {
DataType element = parseDataType(json.get("element"), fieldId);
return new ArrayType(!typeString.contains("NOT NULL"), element);
} else if (typeString.startsWith("VECTOR")) {
DataType element = parseDataType(json.get("element"), fieldId);
int length = json.get("length").asInt();
return new VectorType(!typeString.contains("NOT NULL"), length, element);
} else if (typeString.startsWith("MULTISET")) {
DataType element = parseDataType(json.get("element"), fieldId);
return new MultisetType(!typeString.contains("NOT NULL"), element);
Expand Down Expand Up @@ -318,6 +322,7 @@ private enum Keyword {
SECOND,
TO,
ARRAY,
VECTOR,
MULTISET,
MAP,
ROW,
Expand Down Expand Up @@ -544,6 +549,8 @@ private DataType parseTypeByKeyword() {
return new VariantType();
case BLOB:
return new BlobType();
case VECTOR:
return parseVectorType();
default:
throw parsingError("Unsupported type: " + token().value);
}
Expand Down Expand Up @@ -665,5 +672,16 @@ private int parseOptionalPrecision(int defaultPrecision) {
}
return precision;
}

private DataType parseVectorType() {
// VECTOR<elementType, length>
nextToken(TokenType.BEGIN_SUBTYPE);
DataType elementType = parseTypeWithNullability();
nextToken(TokenType.LIST_SEPARATOR);
nextToken(TokenType.LITERAL_INT);
int length = tokenAsInt();
nextToken(TokenType.END_SUBTYPE);
return DataTypes.VECTOR(length, elementType);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ public enum DataTypeRoot {

ARRAY(DataTypeFamily.CONSTRUCTED, DataTypeFamily.COLLECTION),

VECTOR(DataTypeFamily.CONSTRUCTED, DataTypeFamily.COLLECTION),

MULTISET(DataTypeFamily.CONSTRUCTED, DataTypeFamily.COLLECTION),

MAP(DataTypeFamily.CONSTRUCTED, DataTypeFamily.EXTENSION),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ public interface DataTypeVisitor<R> {

R visit(ArrayType arrayType);

R visit(VectorType vectorType);

R visit(MultisetType multisetType);

R visit(MapType mapType);
Expand Down
13 changes: 13 additions & 0 deletions paimon-api/src/main/java/org/apache/paimon/types/DataTypes.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,14 @@ public static ArrayType ARRAY(DataType element) {
return new ArrayType(element);
}

public static VectorType VECTOR(int length, DataType element) {
// The element type of vector should currently be nonNull.
// However, most other types default to nullable.
// For conciseness, we accommodate a nullable input element type here
// and copy the nonNull version when needed.
return new VectorType(length, element.isNullable() ? element.notNull() : element);
}

public static CharType CHAR(int length) {
return new CharType(length);
}
Expand Down Expand Up @@ -221,6 +229,11 @@ public OptionalInt visit(VarBinaryType varBinaryType) {
return OptionalInt.of(varBinaryType.getLength());
}

@Override
public OptionalInt visit(VectorType vectorType) {
return OptionalInt.of(vectorType.getLength());
}

@Override
protected OptionalInt defaultMethod(DataType dataType) {
return OptionalInt.empty();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ public DataType visit(ArrayType arrayType) {
return new ArrayType(arrayType.isNullable(), arrayType.getElementType().accept(this));
}

@Override
public DataType visit(VectorType vectorType) {
return new VectorType(
vectorType.isNullable(),
vectorType.getLength(),
vectorType.getElementType().accept(this));
}

@Override
public DataType visit(MultisetType multisetType) {
return new MultisetType(
Expand Down
181 changes: 181 additions & 0 deletions paimon-api/src/main/java/org/apache/paimon/types/VectorType.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.types;

import org.apache.paimon.annotation.Public;
import org.apache.paimon.utils.Preconditions;

import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.core.JsonGenerator;

import java.io.IOException;
import java.util.Objects;
import java.util.Set;

/**
* Data type of fixed-size vector type. The elements are densely stored.
*
* @since 2.0.0
*/
@Public
public class VectorType extends DataType {

private static final long serialVersionUID = 1L;

public static final int MIN_LENGTH = 1;

public static final int MAX_LENGTH = Integer.MAX_VALUE;

public static final String FORMAT = "VECTOR<%s, %d>";

private final DataType elementType;

private final int length;

public VectorType(boolean isNullable, int length, DataType elementType) {
super(isNullable, DataTypeRoot.VECTOR);
this.elementType =
Preconditions.checkNotNull(elementType, "Element type must not be null.");
Preconditions.checkArgument(
isValidElementType(elementType), "Invalid element type for vector: " + elementType);
// Currently we do not support nullable elements.
Preconditions.checkArgument(
!elementType.isNullable(), "Element type must be nonNull for vector.");
if (length < MIN_LENGTH) {
throw new IllegalArgumentException(
String.format(
"Vector length must be between %d and %d (both inclusive).",
MIN_LENGTH, MAX_LENGTH));
}
this.length = length;
}

public VectorType(int length, DataType elementType) {
this(false, length, elementType); // For vector type we prefer NOT NULL
}

public int getLength() {
return length;
}

public DataType getElementType() {
return elementType;
}

public static boolean isValidElementType(DataType elementType) {
switch (elementType.getTypeRoot()) {
case BOOLEAN:
case TINYINT:
case SMALLINT:
case INTEGER:
case BIGINT:
case FLOAT:
case DOUBLE:
return true;
default:
return false;
}
}

@Override
public int defaultSize() {
return elementType.defaultSize() * length;
}

@Override
public DataType copy(boolean isNullable) {
return new VectorType(isNullable, length, elementType.copy());
}

@Override
public String asSQLString() {
return withNullability(FORMAT, elementType.asSQLString(), length);
}

@Override
public void serializeJson(JsonGenerator generator) throws IOException {
generator.writeStartObject();
generator.writeStringField("type", isNullable() ? "VECTOR" : "VECTOR NOT NULL");
generator.writeFieldName("element");
elementType.serializeJson(generator);
generator.writeFieldName("length");
generator.writeNumber(length);
generator.writeEndObject();
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
if (!super.equals(o)) {
return false;
}
VectorType vectorType = (VectorType) o;
return elementType.equals(vectorType.elementType) && length == vectorType.length;
}

@Override
public boolean equalsIgnoreFieldId(DataType o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
if (!super.equals(o)) {
return false;
}
VectorType vectorType = (VectorType) o;
return elementType.equalsIgnoreFieldId(vectorType.elementType)
&& length == vectorType.length;
}

@Override
public boolean isPrunedFrom(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
if (!super.equals(o)) {
return false;
}
VectorType vectorType = (VectorType) o;
return elementType.isPrunedFrom(vectorType.elementType);
}

@Override
public int hashCode() {
return Objects.hash(super.hashCode(), elementType, length);
}

@Override
public <R> R accept(DataTypeVisitor<R> visitor) {
return visitor.visit(this);
}

@Override
public void collectFieldIds(Set<Integer> fieldIds) {
elementType.collectFieldIds(fieldIds);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.apache.paimon.types.VarBinaryType;
import org.apache.paimon.types.VarCharType;
import org.apache.paimon.types.VariantType;
import org.apache.paimon.types.VectorType;

import org.apache.arrow.vector.types.TimeUnit;
import org.apache.arrow.vector.types.Types;
Expand Down Expand Up @@ -179,6 +180,12 @@ public FieldType visit(ArrayType arrayType) {
return new FieldType(arrayType.isNullable(), Types.MinorType.LIST.getType(), null);
}

@Override
public FieldType visit(VectorType vectorType) {
ArrowType arrowType = new ArrowType.FixedSizeList(vectorType.getLength());
return new FieldType(vectorType.isNullable(), arrowType, null);
}

@Override
public FieldType visit(MultisetType multisetType) {
throw new UnsupportedOperationException("Doesn't support MultisetType.");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
import org.apache.paimon.types.VarBinaryType;
import org.apache.paimon.types.VarCharType;
import org.apache.paimon.types.VariantType;
import org.apache.paimon.types.VectorType;

import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.BitVector;
Expand Down Expand Up @@ -482,6 +483,11 @@ public ColumnVector getColumnVector() {
};
}

@Override
public Arrow2PaimonVectorConverter visit(VectorType vectorType) {
throw new UnsupportedOperationException("Doesn't support VectorType.");
}

@Override
public Arrow2PaimonVectorConverter visit(MultisetType multisetType) {
throw new UnsupportedOperationException("Doesn't support MultisetType.");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import org.apache.paimon.types.VarBinaryType;
import org.apache.paimon.types.VarCharType;
import org.apache.paimon.types.VariantType;
import org.apache.paimon.types.VectorType;

import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.complex.ListVector;
Expand Down Expand Up @@ -165,6 +166,11 @@ public ArrowFieldWriterFactory visit(ArrayType arrayType) {
isNullable);
}

@Override
public ArrowFieldWriterFactory visit(VectorType vectorType) {
throw new UnsupportedOperationException("Doesn't support VectorType.");
}

@Override
public ArrowFieldWriterFactory visit(MultisetType multisetType) {
throw new UnsupportedOperationException("Doesn't support MultisetType.");
Expand Down
Loading