本文主要研究一下flink的CheckpointedFunction
public class BufferingSink
implements SinkFunction<Tuple2<String, Integer>>,
CheckpointedFunction {
private final int threshold;
private transient ListState<Tuple2<String, Integer>> checkpointedState;
private List<Tuple2<String, Integer>> bufferedElements;
public BufferingSink(int threshold) {
this.threshold = threshold;
this.bufferedElements = new ArrayList<>();
}
@Override
public void invoke(Tuple2<String, Integer> value) throws Exception {
bufferedElements.add(value);
if (bufferedElements.size() == threshold) {
for (Tuple2<String, Integer> element: bufferedElements) {
// send it to the sink
}
bufferedElements.clear();
}
}
@Override
public void snapshotState(FunctionSnapshotContext context) throws Exception {
checkpointedState.clear();
for (Tuple2<String, Integer> element : bufferedElements) {
checkpointedState.add(element);
}
}
@Override
public void initializeState(FunctionInitializationContext context) throws Exception {
ListStateDescriptor<Tuple2<String, Integer>> descriptor =
new ListStateDescriptor<>(
"buffered-elements",
TypeInformation.of(new TypeHint<Tuple2<String, Integer>>() {}));
checkpointedState = context.getOperatorStateStore().getListState(descriptor);
if (context.isRestored()) {
for (Tuple2<String, Integer> element : checkpointedState.get()) {
bufferedElements.add(element);
}
}
}
}
flink-streaming-java_2.11-1.7.0-sources.jar!/org/apache/flink/streaming/api/checkpoint/CheckpointedFunction.java
@PublicEvolving
@SuppressWarnings("deprecation")
public interface CheckpointedFunction {
/**
* This method is called when a snapshot for a checkpoint is requested. This acts as a hook to the function to
* ensure that all state is exposed by means previously offered through {@link FunctionInitializationContext} when
* the Function was initialized, or offered now by {@link FunctionSnapshotContext} itself.
*
* @param context the context for drawing a snapshot of the operator
* @throws Exception
*/
void snapshotState(FunctionSnapshotContext context) throws Exception;
/**
* This method is called when the parallel function instance is created during distributed
* execution. Functions typically set up their state storing data structures in this method.
*
* @param context the context for initializing the operator
* @throws Exception
*/
void initializeState(FunctionInitializationContext context) throws Exception;
}
flink-runtime_2.11-1.7.0-sources.jar!/org/apache/flink/runtime/state/FunctionSnapshotContext.java
/**
* This interface provides a context in which user functions that use managed state (i.e. state that is managed by state
* backends) can participate in a snapshot. As snapshots of the backends themselves are taken by the system, this
* interface mainly provides meta information about the checkpoint.
*/
@PublicEvolving
public interface FunctionSnapshotContext extends ManagedSnapshotContext {
}
flink-runtime_2.11-1.7.0-sources.jar!/org/apache/flink/runtime/state/ManagedSnapshotContext.java
/**
* This interface provides a context in which operators that use managed state (i.e. state that is managed by state
* backends) can perform a snapshot. As snapshots of the backends themselves are taken by the system, this interface
* mainly provides meta information about the checkpoint.
*/
@PublicEvolving
public interface ManagedSnapshotContext {
/**
* Returns the ID of the checkpoint for which the snapshot is taken.
*
* <p>The checkpoint ID is guaranteed to be strictly monotonously increasing across checkpoints.
* For two completed checkpoints <i>A</i> and <i>B</i>, {@code ID_B > ID_A} means that checkpoint
* <i>B</i> subsumes checkpoint <i>A</i>, i.e., checkpoint <i>B</i> contains a later state
* than checkpoint <i>A</i>.
*/
long getCheckpointId();
/**
* Returns timestamp (wall clock time) when the master node triggered the checkpoint for which
* the state snapshot is taken.
*/
long getCheckpointTimestamp();
}
flink-runtime_2.11-1.7.0-sources.jar!/org/apache/flink/runtime/state/FunctionInitializationContext.java
/**
* This interface provides a context in which user functions can initialize by registering to managed state (i.e. state
* that is managed by state backends).
*
* <p>
* Operator state is available to all functions, while keyed state is only available for functions after keyBy.
*
* <p>
* For the purpose of initialization, the context signals if the state is empty or was restored from a previous
* execution.
*
*/
@PublicEvolving
public interface FunctionInitializationContext extends ManagedInitializationContext {
}
flink-runtime_2.11-1.7.0-sources.jar!/org/apache/flink/runtime/state/ManagedInitializationContext.java
/**
* This interface provides a context in which operators can initialize by registering to managed state (i.e. state that
* is managed by state backends).
*
* <p>
* Operator state is available to all operators, while keyed state is only available for operators after keyBy.
*
* <p>
* For the purpose of initialization, the context signals if the state is empty (new operator) or was restored from
* a previous execution of this operator.
*
*/
public interface ManagedInitializationContext {
/**
* Returns true, if state was restored from the snapshot of a previous execution. This returns always false for
* stateless tasks.
*/
boolean isRestored();
/**
* Returns an interface that allows for registering operator state with the backend.
*/
OperatorStateStore getOperatorStateStore();
/**
* Returns an interface that allows for registering keyed state with the backend.
*/
KeyedStateStore getKeyedStateStore();
}
non-keyed state
);其中Keyed State只能在KeyedStream上的functions及operators上使用;每个operator state会跟parallel operator中的一个实例绑定;Operator State支持parallelism变更时进行redistributing第一次初始化或者从前一次checkpoint recover的时候
)被调用,该方法不仅可以用来初始化state,还可以用于处理state recovery的逻辑在restore/redistribution的时候每个operator仅仅得到整个state的sublist
)及Union redistribution(在restore/redistribution的时候每个operator得到整个state的完整list
)