From 60c8a23b43d480cf29834dbe782384b35ec893ea Mon Sep 17 00:00:00 2001 From: SubramanyaV Date: Mon, 30 Mar 2026 14:26:02 +0530 Subject: [PATCH 1/6] Improve the Javadoc for encode/decode methods in coder methods in coder for better clarity on context behaviour --- .../org/apache/beam/sdk/coders/Coder.java | 72 ++++++++++++++----- 1 file changed, 54 insertions(+), 18 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java index 0a3650ca133b..d001f8a93b48 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java @@ -53,6 +53,24 @@ * * @param the type of values being encoded and decoded */ +/** + *

Example usage: + *

{@code
+ * Coder coder = StringUtf8Coder.of();
+ *
+ * // Encoding a single standalone value(typically uses OUTER context)
+ * coder.encode("hello", outStream);
+ *
+ * // Encoding multiple values (NESTED context scenario)
+ * for (String value : values) {
+ *     coder.encode(value, outStream);
+ * }
+ * }
+ * + *

When multiple values are encoded into the same stream, coders must ensure + * that each value can be correctly decoded. This is typically done by encoding + * length or delimiter information. + */ public abstract class Coder implements Serializable { /** * The context in which encoding or decoding is being done. @@ -64,22 +82,32 @@ public abstract class Coder implements Serializable { @Deprecated public static class Context { /** - * The outer context: the value being encoded or decoded takes up the remainder of the - * record/stream contents. - */ + * The outer context indicates that the value being encoded or decoded + * occupies the entire remaining stream. + * + *

In this context, the coder does not need to include length or boundary + * information, since the value extends to the end of the stream. + * + *

Example: Encoding a single standalone value. + */ public static final Context OUTER = new Context(true); - /** - * The nested context: the value being encoded or decoded is (potentially) a part of a larger - * record/stream contents, and may have other parts encoded or decoded after it. - */ + * The nested context indicates that the value being encoded or decoded + * is part of a larger structure or stream containing multiple values. + * + *

In this context, the coder must include enough information (such as + * length or delimiters) to allow correct decoding of individual elements. + * + *

Example: Encoding elements inside a collection or record. + */ public static final Context NESTED = new Context(false); /** - * Whether the encoded or decoded value fills the remainder of the output or input (resp.) - * record/stream contents. If so, then the size of the decoded value can be determined from the - * remaining size of the record/stream contents, and so explicit lengths aren't required. - */ + * Indicates whether the encoded/decoded value consumes the entire remaining stream. + * + *

If true, no additional length information is required. + * If false, the coder must encode boundaries to allow correct decoding. + */ public final boolean isWholeStream; public Context(boolean isWholeStream) { @@ -116,9 +144,11 @@ public String toString() { * be encoded next to each other on the output stream, each coder should encode information to * know how many bytes to read when decoding. A common approach is to prefix the encoding with the * element's encoded length. - * - * @throws IOException if writing to the {@code OutputStream} fails for some reason - * @throws CoderException if the value could not be encoded for some reason + *

The behavior of encoding depends on the {@link Context} in which it is used. + * When using {@link Context#OUTER}, the encoded value may consume the entire remaining stream, + * so no additional length information is required. In contrast, when using {@link Context#NESTED}, + * the encoded value is part of a larger structure, and the coder must include sufficient + * boundary information (such as length prefixes) to allow correct decoding of individual elements. */ public abstract void encode(T value, OutputStream outStream) throws CoderException, IOException; @@ -136,10 +166,16 @@ public void encode(T value, OutputStream outStream, Context context) } /** - * Decodes a value of type {@code T} from the given input stream in the given context. Returns the - * decoded value. Multiple elements can be encoded next to each other on the input stream, each - * coder should encode information to know how many bytes to read when decoding. A common approach - * is to prefix the encoding with the element's encoded length. + * Decodes a value of type {@code T} from the given input stream and returns the decoded value. + * + *

When multiple elements are encoded in the same stream, the coder must be able to determine + * how many bytes to read for each element. This is typically achieved by encoding length or + * delimiter information during encoding. + * + *

The behavior of decoding depends on the {@link Context} in which it is used. + * When decoding in {@link Context#OUTER}, the value is expected to consume the entire remaining + * stream. In {@link Context#NESTED}, the value is part of a larger structure, so the coder must + * rely on encoded boundaries (such as length prefixes) to correctly extract individual elements. * * @throws IOException if reading from the {@code InputStream} fails for some reason * @throws CoderException if the value could not be decoded for some reason From 760905140cba0c9fc9a445fed8232473f1c2b98e Mon Sep 17 00:00:00 2001 From: SubramanyaV Date: Mon, 30 Mar 2026 18:14:27 +0530 Subject: [PATCH 2/6] Apply spotless formatting --- .../org/apache/beam/sdk/coders/Coder.java | 69 ++++++++++--------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java index d001f8a93b48..19a2cd62eb75 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java @@ -54,7 +54,8 @@ * @param the type of values being encoded and decoded */ /** - *

Example usage: + * Example usage: + * *

{@code
  * Coder coder = StringUtf8Coder.of();
  *
@@ -67,9 +68,8 @@
  * }
  * }
* - *

When multiple values are encoded into the same stream, coders must ensure - * that each value can be correctly decoded. This is typically done by encoding - * length or delimiter information. + *

When multiple values are encoded into the same stream, coders must ensure that each value can + * be correctly decoded. This is typically done by encoding length or delimiter information. */ public abstract class Coder implements Serializable { /** @@ -82,32 +82,32 @@ public abstract class Coder implements Serializable { @Deprecated public static class Context { /** - * The outer context indicates that the value being encoded or decoded - * occupies the entire remaining stream. - * - *

In this context, the coder does not need to include length or boundary - * information, since the value extends to the end of the stream. - * - *

Example: Encoding a single standalone value. - */ + * The outer context indicates that the value being encoded or decoded occupies the entire + * remaining stream. + * + *

In this context, the coder does not need to include length or boundary information, since + * the value extends to the end of the stream. + * + *

Example: Encoding a single standalone value. + */ public static final Context OUTER = new Context(true); /** - * The nested context indicates that the value being encoded or decoded - * is part of a larger structure or stream containing multiple values. - * - *

In this context, the coder must include enough information (such as - * length or delimiters) to allow correct decoding of individual elements. - * - *

Example: Encoding elements inside a collection or record. - */ + * The nested context indicates that the value being encoded or decoded is part of a larger + * structure or stream containing multiple values. + * + *

In this context, the coder must include enough information (such as length or delimiters) + * to allow correct decoding of individual elements. + * + *

Example: Encoding elements inside a collection or record. + */ public static final Context NESTED = new Context(false); /** - * Indicates whether the encoded/decoded value consumes the entire remaining stream. - * - *

If true, no additional length information is required. - * If false, the coder must encode boundaries to allow correct decoding. - */ + * Indicates whether the encoded/decoded value consumes the entire remaining stream. + * + *

If true, no additional length information is required. If false, the coder must encode + * boundaries to allow correct decoding. + */ public final boolean isWholeStream; public Context(boolean isWholeStream) { @@ -144,11 +144,12 @@ public String toString() { * be encoded next to each other on the output stream, each coder should encode information to * know how many bytes to read when decoding. A common approach is to prefix the encoding with the * element's encoded length. - *

The behavior of encoding depends on the {@link Context} in which it is used. - * When using {@link Context#OUTER}, the encoded value may consume the entire remaining stream, - * so no additional length information is required. In contrast, when using {@link Context#NESTED}, - * the encoded value is part of a larger structure, and the coder must include sufficient - * boundary information (such as length prefixes) to allow correct decoding of individual elements. + * + *

The behavior of encoding depends on the {@link Context} in which it is used. When using + * {@link Context#OUTER}, the encoded value may consume the entire remaining stream, so no + * additional length information is required. In contrast, when using {@link Context#NESTED}, the + * encoded value is part of a larger structure, and the coder must include sufficient boundary + * information (such as length prefixes) to allow correct decoding of individual elements. */ public abstract void encode(T value, OutputStream outStream) throws CoderException, IOException; @@ -172,10 +173,10 @@ public void encode(T value, OutputStream outStream, Context context) * how many bytes to read for each element. This is typically achieved by encoding length or * delimiter information during encoding. * - *

The behavior of decoding depends on the {@link Context} in which it is used. - * When decoding in {@link Context#OUTER}, the value is expected to consume the entire remaining - * stream. In {@link Context#NESTED}, the value is part of a larger structure, so the coder must - * rely on encoded boundaries (such as length prefixes) to correctly extract individual elements. + *

The behavior of decoding depends on the {@link Context} in which it is used. When decoding + * in {@link Context#OUTER}, the value is expected to consume the entire remaining stream. In + * {@link Context#NESTED}, the value is part of a larger structure, so the coder must rely on + * encoded boundaries (such as length prefixes) to correctly extract individual elements. * * @throws IOException if reading from the {@code InputStream} fails for some reason * @throws CoderException if the value could not be decoded for some reason From 18395a2a3ccc4bf8bac5d4f2e944e328b4152071 Mon Sep 17 00:00:00 2001 From: SubramanyaV Date: Tue, 31 Mar 2026 12:16:58 +0530 Subject: [PATCH 3/6] Fix minimal javadoc wording for coder encode decode --- .../org/apache/beam/sdk/coders/Coder.java | 65 ++++--------------- 1 file changed, 14 insertions(+), 51 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java index 19a2cd62eb75..0a3650ca133b 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java @@ -53,24 +53,6 @@ * * @param the type of values being encoded and decoded */ -/** - * Example usage: - * - *

{@code
- * Coder coder = StringUtf8Coder.of();
- *
- * // Encoding a single standalone value(typically uses OUTER context)
- * coder.encode("hello", outStream);
- *
- * // Encoding multiple values (NESTED context scenario)
- * for (String value : values) {
- *     coder.encode(value, outStream);
- * }
- * }
- * - *

When multiple values are encoded into the same stream, coders must ensure that each value can - * be correctly decoded. This is typically done by encoding length or delimiter information. - */ public abstract class Coder implements Serializable { /** * The context in which encoding or decoding is being done. @@ -82,31 +64,21 @@ public abstract class Coder implements Serializable { @Deprecated public static class Context { /** - * The outer context indicates that the value being encoded or decoded occupies the entire - * remaining stream. - * - *

In this context, the coder does not need to include length or boundary information, since - * the value extends to the end of the stream. - * - *

Example: Encoding a single standalone value. + * The outer context: the value being encoded or decoded takes up the remainder of the + * record/stream contents. */ public static final Context OUTER = new Context(true); + /** - * The nested context indicates that the value being encoded or decoded is part of a larger - * structure or stream containing multiple values. - * - *

In this context, the coder must include enough information (such as length or delimiters) - * to allow correct decoding of individual elements. - * - *

Example: Encoding elements inside a collection or record. + * The nested context: the value being encoded or decoded is (potentially) a part of a larger + * record/stream contents, and may have other parts encoded or decoded after it. */ public static final Context NESTED = new Context(false); /** - * Indicates whether the encoded/decoded value consumes the entire remaining stream. - * - *

If true, no additional length information is required. If false, the coder must encode - * boundaries to allow correct decoding. + * Whether the encoded or decoded value fills the remainder of the output or input (resp.) + * record/stream contents. If so, then the size of the decoded value can be determined from the + * remaining size of the record/stream contents, and so explicit lengths aren't required. */ public final boolean isWholeStream; @@ -145,11 +117,8 @@ public String toString() { * know how many bytes to read when decoding. A common approach is to prefix the encoding with the * element's encoded length. * - *

The behavior of encoding depends on the {@link Context} in which it is used. When using - * {@link Context#OUTER}, the encoded value may consume the entire remaining stream, so no - * additional length information is required. In contrast, when using {@link Context#NESTED}, the - * encoded value is part of a larger structure, and the coder must include sufficient boundary - * information (such as length prefixes) to allow correct decoding of individual elements. + * @throws IOException if writing to the {@code OutputStream} fails for some reason + * @throws CoderException if the value could not be encoded for some reason */ public abstract void encode(T value, OutputStream outStream) throws CoderException, IOException; @@ -167,16 +136,10 @@ public void encode(T value, OutputStream outStream, Context context) } /** - * Decodes a value of type {@code T} from the given input stream and returns the decoded value. - * - *

When multiple elements are encoded in the same stream, the coder must be able to determine - * how many bytes to read for each element. This is typically achieved by encoding length or - * delimiter information during encoding. - * - *

The behavior of decoding depends on the {@link Context} in which it is used. When decoding - * in {@link Context#OUTER}, the value is expected to consume the entire remaining stream. In - * {@link Context#NESTED}, the value is part of a larger structure, so the coder must rely on - * encoded boundaries (such as length prefixes) to correctly extract individual elements. + * Decodes a value of type {@code T} from the given input stream in the given context. Returns the + * decoded value. Multiple elements can be encoded next to each other on the input stream, each + * coder should encode information to know how many bytes to read when decoding. A common approach + * is to prefix the encoding with the element's encoded length. * * @throws IOException if reading from the {@code InputStream} fails for some reason * @throws CoderException if the value could not be decoded for some reason From 88ebf83177f98ddcbd1fe324e01815d3d5f7837c Mon Sep 17 00:00:00 2001 From: SubramanyaV Date: Wed, 1 Apr 2026 15:19:01 +0530 Subject: [PATCH 4/6] Fix serial warning in starter and example archetype --- .../src/main/java/StarterPipeline.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java b/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java index d6afdecf11db..9ddbd2ae1f1c 100644 --- a/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java +++ b/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java @@ -51,15 +51,20 @@ public static void main(String[] args) { PipelineOptionsFactory.fromArgs(args).withValidation().create()); p.apply(Create.of("Hello", "World")) - .apply(MapElements.via(new SimpleFunction() { + .apply(MapElements.via( + new @SuppressWarnings("serial") + SimpleFunction() { @Override public String apply(String input) { return input.toUpperCase(); } })) - .apply(ParDo.of(new DoFn() { + .apply(ParDo.of( + new @SuppressWarnings("serial") + DoFn() { @ProcessElement - public void processElement(ProcessContext c) { + public void + processElement(ProcessContext c) { LOG.info(c.element()); } })); From 7502e6450c160462c26e3ea3891c969b5ce75aab Mon Sep 17 00:00:00 2001 From: SubramanyaV Date: Wed, 1 Apr 2026 21:50:23 +0530 Subject: [PATCH 5/6] Address review: use @Element and fix formatting revised --- .../src/main/java/StarterPipeline.java | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java b/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java index 9ddbd2ae1f1c..1cfcbf2249f8 100644 --- a/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java +++ b/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java @@ -51,21 +51,20 @@ public static void main(String[] args) { PipelineOptionsFactory.fromArgs(args).withValidation().create()); p.apply(Create.of("Hello", "World")) - .apply(MapElements.via( - new @SuppressWarnings("serial") - SimpleFunction() { - @Override - public String apply(String input) { - return input.toUpperCase(); - } - })) - .apply(ParDo.of( - new @SuppressWarnings("serial") - DoFn() { + .apply( + MapElements.via( + new SimpleFunction() { + @Override + public String apply(String input) { + return input.toUpperCase(); + } + })) + .apply( + ParDo.of( + new DoFn() { @ProcessElement - public void - processElement(ProcessContext c) { - LOG.info(c.element()); + public void processElement(@Element String element) { + LOG.info(element); } })); From b90cfb5ada50bb85bbe99feb71765504c1edd533 Mon Sep 17 00:00:00 2001 From: SubramanyaV Date: Wed, 1 Apr 2026 22:54:57 +0530 Subject: [PATCH 6/6] added import for the starterpipeline --- .../archetype-resources/src/main/java/StarterPipeline.java | 1 + 1 file changed, 1 insertion(+) diff --git a/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java b/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java index 1cfcbf2249f8..e9ee93fd3da6 100644 --- a/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java +++ b/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java @@ -26,6 +26,7 @@ import org.apache.beam.sdk.transforms.SimpleFunction; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.beam.sdk.transforms.DoFn.Element; /** * A starter example for writing Beam programs.