mirror of https://github.com/yshui/picom.git
core: factor out code for estimating the rendering time budget
Add a render_statistics type to encapsulate all the statistics done on rendering times. And use that to estimate the time budget for rendering and frame pacing. Tweak the rolling window utilities a bit so we can reuse one rolling window for multiple statistics. Signed-off-by: Yuxuan Shui <yshuiv7@gmail.com>
This commit is contained in:
parent
336cb0917a
commit
8e1f3c92f5
|
@ -176,6 +176,20 @@ void paint_all_new(session_t *ps, struct managed_win *t) {
|
|||
region_t reg_shadow_clip;
|
||||
pixman_region32_init(®_shadow_clip);
|
||||
|
||||
struct timespec now;
|
||||
clock_gettime(CLOCK_MONOTONIC, &now);
|
||||
auto now_us = (uint64_t)(now.tv_sec * 1000000 + now.tv_nsec / 1000);
|
||||
if (ps->next_render > 0) {
|
||||
log_trace("Render schedule deviation: %ld us (%s) %ld %ld",
|
||||
labs((int64_t)now_us - (int64_t)ps->next_render),
|
||||
now_us < ps->next_render ? "early" : "late", now_us,
|
||||
ps->next_render);
|
||||
ps->last_schedule_delay = 0;
|
||||
if (now_us > ps->next_render) {
|
||||
ps->last_schedule_delay = now_us - ps->next_render;
|
||||
}
|
||||
}
|
||||
|
||||
if (ps->backend_data->ops->prepare) {
|
||||
ps->backend_data->ops->prepare(ps->backend_data, ®_paint);
|
||||
}
|
||||
|
|
12
src/common.h
12
src/common.h
|
@ -58,6 +58,7 @@
|
|||
#include "list.h"
|
||||
#include "region.h"
|
||||
#include "render.h"
|
||||
#include "statistics.h"
|
||||
#include "types.h"
|
||||
#include "utils.h"
|
||||
#include "win_defs.h"
|
||||
|
@ -249,13 +250,16 @@ typedef struct session {
|
|||
/// When the currently rendered frame will be displayed.
|
||||
/// 0 means there is no pending frame.
|
||||
uint64_t target_msc;
|
||||
/// When did we render our last frame.
|
||||
uint64_t last_render;
|
||||
/// The delay between when the last frame was scheduled to be rendered, and when
|
||||
/// the render actually started.
|
||||
uint64_t last_schedule_delay;
|
||||
/// When do we want our next frame to start rendering.
|
||||
uint64_t next_render;
|
||||
/// Whether we can perform frame pacing.
|
||||
bool frame_pacing;
|
||||
|
||||
struct rolling_avg *frame_time;
|
||||
struct rolling_max *render_stats;
|
||||
/// Render statistics
|
||||
struct render_statistics render_stats;
|
||||
|
||||
// === Operation related ===
|
||||
/// Flags related to the root window
|
||||
|
|
|
@ -9,7 +9,7 @@ base_deps = [
|
|||
|
||||
srcs = [ files('picom.c', 'win.c', 'c2.c', 'x.c', 'config.c', 'vsync.c', 'utils.c',
|
||||
'diagnostic.c', 'string_utils.c', 'render.c', 'kernel.c', 'log.c',
|
||||
'options.c', 'event.c', 'cache.c', 'atom.c', 'file_watch.c') ]
|
||||
'options.c', 'event.c', 'cache.c', 'atom.c', 'file_watch.c', 'statistics.c') ]
|
||||
picom_inc = include_directories('.')
|
||||
|
||||
cflags = []
|
||||
|
|
119
src/picom.c
119
src/picom.c
|
@ -16,6 +16,7 @@
|
|||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <inttypes.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
@ -43,7 +44,6 @@
|
|||
#endif
|
||||
#include "backend/backend.h"
|
||||
#include "c2.h"
|
||||
#include "config.h"
|
||||
#include "diagnostic.h"
|
||||
#include "log.h"
|
||||
#include "region.h"
|
||||
|
@ -60,6 +60,7 @@
|
|||
#include "file_watch.h"
|
||||
#include "list.h"
|
||||
#include "options.h"
|
||||
#include "statistics.h"
|
||||
#include "uthash_extra.h"
|
||||
|
||||
/// Get session_t pointer from a pointer to a member of session_t
|
||||
|
@ -177,8 +178,17 @@ static inline struct managed_win *find_win_all(session_t *ps, const xcb_window_t
|
|||
/// 4. otherwise, we schedule a render for that target frame. we use past statistics about
|
||||
/// how long our renders took to figure out when to start rendering. we start rendering
|
||||
/// at the latest point of time possible to still hit the target frame.
|
||||
void schedule_render(session_t *ps) {
|
||||
///
|
||||
/// The `triggered_by_timer` parameter is used to indicate whether this function is
|
||||
/// triggered by a steady timer, i.e. we are rendering for each vblank. The other case is
|
||||
/// when we stop rendering for a while because there is no changes on screen, then
|
||||
/// something changed and schedule_render is triggered by a DamageNotify. The idea is that
|
||||
/// when the schedule is triggered by a steady timer, schedule_render will be called at a
|
||||
/// predictable offset into each vblank.
|
||||
|
||||
void schedule_render(session_t *ps, bool triggered_by_vblank) {
|
||||
double delay_s = 0;
|
||||
ps->next_render = 0;
|
||||
if (!ps->frame_pacing || !ps->redirected) {
|
||||
// Not doing frame pacing, schedule a render immediately, if not already
|
||||
// scheduled.
|
||||
|
@ -205,59 +215,77 @@ void schedule_render(session_t *ps) {
|
|||
// We missed our target, push it back one frame
|
||||
ps->target_msc = ps->last_msc + 1;
|
||||
}
|
||||
log_trace("Still rendering for target frame %ld, not scheduling another "
|
||||
"render",
|
||||
ps->target_msc);
|
||||
return;
|
||||
}
|
||||
if (ps->target_msc > ps->last_msc) {
|
||||
// Render for the target frame is completed, and has yet to be displayed.
|
||||
// Render for the target frame is completed, but is yet to be displayed.
|
||||
// Don't schedule another render.
|
||||
log_trace("Target frame %ld is in the future, and we have already "
|
||||
"rendered, last msc: %d",
|
||||
ps->target_msc, (int)ps->last_msc);
|
||||
return;
|
||||
}
|
||||
|
||||
struct timespec now;
|
||||
clock_gettime(CLOCK_MONOTONIC, &now);
|
||||
auto now_us = (uint64_t)now.tv_sec * 1000000 + (uint64_t)now.tv_nsec / 1000;
|
||||
if (triggered_by_vblank) {
|
||||
log_trace("vblank schedule delay: %ld us", now_us - ps->last_msc_instant);
|
||||
}
|
||||
|
||||
int render_time_us =
|
||||
(int)(render_time.tv_sec * 1000000L + render_time.tv_nsec / 1000L);
|
||||
if (ps->target_msc == ps->last_msc) {
|
||||
// The frame has just been displayed, record its render time;
|
||||
log_trace("Last render call took: %d us, rolling_max: %d us",
|
||||
render_time_us, rolling_max_get_max(ps->render_stats));
|
||||
rolling_max_push(ps->render_stats, render_time_us);
|
||||
log_trace("Last render call took: %d (gpu) + %d (cpu) us, "
|
||||
"last_msc: %" PRIu64,
|
||||
render_time_us, (int)ps->last_schedule_delay, ps->last_msc);
|
||||
render_statistics_add_render_time_sample(
|
||||
&ps->render_stats, render_time_us + (int)ps->last_schedule_delay);
|
||||
ps->target_msc = 0;
|
||||
ps->last_schedule_delay = 0;
|
||||
}
|
||||
|
||||
// TODO(yshui): why 1500?
|
||||
const int SLACK = 1500;
|
||||
int render_time_estimate = rolling_max_get_max(ps->render_stats);
|
||||
auto frame_time = (uint64_t)rolling_avg_get_avg(ps->frame_time);
|
||||
if (render_time_estimate < 0 || frame_time == 0) {
|
||||
// We don't have render time, and/or frame time estimates, maybe there's
|
||||
unsigned int divisor = 0;
|
||||
auto render_budget = render_statistics_get_budget(&ps->render_stats, &divisor);
|
||||
auto frame_time = render_statistics_get_vblank_time(&ps->render_stats);
|
||||
if (frame_time == 0) {
|
||||
// We don't have enough data for render time estimates, maybe there's
|
||||
// no frame rendered yet, or the backend doesn't support render timing
|
||||
// information, schedule render immediately.
|
||||
ps->target_msc = ps->last_msc + 1;
|
||||
goto schedule;
|
||||
}
|
||||
render_time_estimate += SLACK;
|
||||
|
||||
auto minimal_target_us = now_us + (uint64_t)render_time_estimate;
|
||||
auto frame_delay = (uint64_t)ceil(
|
||||
(double)(minimal_target_us - ps->last_msc_instant) / (double)frame_time);
|
||||
auto deadline = ps->last_msc_instant + frame_delay * frame_time;
|
||||
|
||||
ps->target_msc = ps->last_msc + frame_delay;
|
||||
auto delay = deadline - (uint64_t)render_time_estimate - now_us;
|
||||
delay_s = (double)delay / 1000000.0;
|
||||
if (delay_s > 1) {
|
||||
log_warn("Delay too long: %f s, render_time: %d us, frame_time: %" PRIu64
|
||||
" us, now_us: %" PRIu64 " us, next_msc: %" PRIu64 " us",
|
||||
delay_s, render_time_estimate, frame_time, now_us, deadline);
|
||||
const auto deadline = ps->last_msc_instant + (unsigned long)divisor * frame_time;
|
||||
unsigned int available = 0;
|
||||
if (deadline > now_us) {
|
||||
available = (unsigned int)(deadline - now_us);
|
||||
}
|
||||
|
||||
log_trace("Delay: %lu us, last_msc: %" PRIu64 ", render_time: %d, frame_time: "
|
||||
"%" PRIu64 ", now_us: %" PRIu64 ", next_msc: %" PRIu64,
|
||||
delay, ps->last_msc_instant, render_time_estimate, frame_time, now_us,
|
||||
deadline);
|
||||
ps->target_msc = ps->last_msc + divisor;
|
||||
if (available > render_budget) {
|
||||
delay_s = (double)(available - render_budget) / 1000000.0;
|
||||
ps->next_render = deadline - render_budget;
|
||||
} else {
|
||||
delay_s = 0;
|
||||
ps->next_render = now_us;
|
||||
}
|
||||
if (delay_s > 1) {
|
||||
log_warn("Delay too long: %f s, render_budget: %d us, frame_time: "
|
||||
"%" PRIu32 " us, now_us: %" PRIu64 " us, next_msc: %" PRIu64 " u"
|
||||
"s",
|
||||
delay_s, render_budget, frame_time, now_us, deadline);
|
||||
}
|
||||
|
||||
log_trace("Delay: %.6lf s, last_msc: %" PRIu64 ", render_budget: %d, frame_time: "
|
||||
"%" PRIu32 ", now_us: %" PRIu64 ", next_msc: %" PRIu64 ", "
|
||||
"target_msc: %" PRIu64 ", divisor: %d",
|
||||
delay_s, ps->last_msc_instant, render_budget, frame_time, now_us,
|
||||
deadline, ps->target_msc, divisor);
|
||||
|
||||
schedule:
|
||||
assert(!ev_is_active(&ps->draw_timer));
|
||||
|
@ -282,7 +310,7 @@ void queue_redraw(session_t *ps) {
|
|||
// If frame pacing is not enabled, pretend this is false.
|
||||
// If --benchmark is used, redraw is always queued
|
||||
if (!ps->redraw_needed && !ps->o.benchmark) {
|
||||
schedule_render(ps);
|
||||
schedule_render(ps, false);
|
||||
}
|
||||
ps->redraw_needed = true;
|
||||
}
|
||||
|
@ -758,7 +786,6 @@ static void configure_root(session_t *ps) {
|
|||
}
|
||||
force_repaint(ps);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
static void handle_root_flags(session_t *ps) {
|
||||
|
@ -1439,10 +1466,9 @@ static bool redirect_start(session_t *ps) {
|
|||
// states.
|
||||
ps->last_msc_instant = 0;
|
||||
ps->last_msc = 0;
|
||||
ps->last_render = 0;
|
||||
ps->last_schedule_delay = 0;
|
||||
ps->target_msc = 0;
|
||||
rolling_avg_reset(ps->frame_time);
|
||||
rolling_max_reset(ps->render_stats);
|
||||
render_statistics_reset(&ps->render_stats);
|
||||
} else if (ps->frame_pacing) {
|
||||
log_error("Present extension is not supported, frame pacing disabled.");
|
||||
ps->frame_pacing = false;
|
||||
|
@ -1543,11 +1569,12 @@ handle_present_complete_notify(session_t *ps, xcb_present_complete_notify_event_
|
|||
if (ps->last_msc_instant != 0) {
|
||||
auto frame_count = cne->msc - ps->last_msc;
|
||||
int frame_time = (int)((cne->ust - ps->last_msc_instant) / frame_count);
|
||||
rolling_avg_push(ps->frame_time, frame_time);
|
||||
log_trace("Frame count %lu, frame time: %d us, rolling average: %lf us, "
|
||||
render_statistics_add_vblank_time_sample(&ps->render_stats, frame_time);
|
||||
log_trace("Frame count %lu, frame time: %d us, rolling average: %u us, "
|
||||
"msc: %" PRIu64 ", offset: %d us",
|
||||
frame_count, frame_time, rolling_avg_get_avg(ps->frame_time),
|
||||
cne->ust, (int)drift);
|
||||
frame_count, frame_time,
|
||||
render_statistics_get_vblank_time(&ps->render_stats), cne->ust,
|
||||
(int)drift);
|
||||
} else if (drift > 1000000 && ps->frame_pacing) {
|
||||
// This is the first MSC event we receive, let's check if the timestamps
|
||||
// align with the monotonic clock. If not, disable frame pacing because we
|
||||
|
@ -1560,7 +1587,7 @@ handle_present_complete_notify(session_t *ps, xcb_present_complete_notify_event_
|
|||
ps->last_msc_instant = cne->ust;
|
||||
ps->last_msc = cne->msc;
|
||||
if (ps->redraw_needed) {
|
||||
schedule_render(ps);
|
||||
schedule_render(ps, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1651,6 +1678,8 @@ static void tmout_unredir_callback(EV_P attr_unused, ev_timer *w, int revents at
|
|||
}
|
||||
|
||||
static void fade_timer_callback(EV_P attr_unused, ev_timer *w, int revents attr_unused) {
|
||||
// TODO(yshui): do we still need the fade timer? we queue redraw automatically in
|
||||
// draw_callback_impl if animation is running.
|
||||
session_t *ps = session_ptr(w, fade_timer);
|
||||
queue_redraw(ps);
|
||||
}
|
||||
|
@ -1778,12 +1807,6 @@ static void draw_callback_impl(EV_P_ session_t *ps, int revents attr_unused) {
|
|||
}
|
||||
log_trace("Render end");
|
||||
|
||||
struct timespec now;
|
||||
clock_gettime(CLOCK_MONOTONIC, &now);
|
||||
uint64_t now_us =
|
||||
(uint64_t)now.tv_sec * 1000000ULL + (uint64_t)now.tv_nsec / 1000;
|
||||
ps->last_render = now_us;
|
||||
|
||||
ps->first_frame = false;
|
||||
paint++;
|
||||
if (ps->o.benchmark && paint >= ps->o.benchmark) {
|
||||
|
@ -2004,8 +2027,7 @@ static session_t *session_init(int argc, char **argv, Display *dpy,
|
|||
pixman_region32_init(&ps->screen_reg);
|
||||
|
||||
// TODO(yshui) investigate what's the best window size
|
||||
ps->render_stats = rolling_max_new(128);
|
||||
ps->frame_time = rolling_avg_new(16);
|
||||
render_statistics_init(&ps->render_stats, 128);
|
||||
|
||||
ps->pending_reply_tail = &ps->pending_reply_head;
|
||||
|
||||
|
@ -2642,8 +2664,7 @@ static void session_destroy(session_t *ps) {
|
|||
free(ps->o.glx_fshader_win_str);
|
||||
x_free_randr_info(ps);
|
||||
|
||||
rolling_avg_destroy(ps->frame_time);
|
||||
rolling_max_destroy(ps->render_stats);
|
||||
render_statistics_destroy(&ps->render_stats);
|
||||
|
||||
// Release custom window shaders
|
||||
free(ps->o.window_shader_fg);
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
//! Rendering statistics
|
||||
//!
|
||||
//! Tracks how long it takes to render a frame, for measuring performance, and for pacing
|
||||
//! the frames.
|
||||
|
||||
#include "statistics.h"
|
||||
#include "log.h"
|
||||
#include "utils.h"
|
||||
|
||||
void render_statistics_init(struct render_statistics *rs, int window_size) {
|
||||
*rs = (struct render_statistics){0};
|
||||
|
||||
rolling_window_init(&rs->render_times, window_size);
|
||||
rolling_quantile_init_with_tolerance(&rs->render_time_quantile, window_size,
|
||||
/* q */ 0.98, /* tolerance */ 0.01);
|
||||
}
|
||||
|
||||
void render_statistics_add_vblank_time_sample(struct render_statistics *rs, int time_us) {
|
||||
auto sample_sd = sqrt(cumulative_mean_and_var_get_var(&rs->vblank_time_us));
|
||||
auto current_estimate = render_statistics_get_vblank_time(rs);
|
||||
if (current_estimate != 0 && fabs((double)time_us - current_estimate) > sample_sd * 3) {
|
||||
// Deviated from the mean by more than 3 sigma (p < 0.003)
|
||||
log_debug("vblank time outlier: %d %f %f", time_us, rs->vblank_time_us.mean,
|
||||
cumulative_mean_and_var_get_var(&rs->vblank_time_us));
|
||||
// An outlier sample, this could mean things like refresh rate changes, so
|
||||
// we reset the statistics. This could also be benign, but we like to be
|
||||
// cautious.
|
||||
cumulative_mean_and_var_init(&rs->vblank_time_us);
|
||||
}
|
||||
|
||||
if (rs->vblank_time_us.mean != 0) {
|
||||
auto nframes_in_10_seconds =
|
||||
(unsigned int)(10. * 1000000. / rs->vblank_time_us.mean);
|
||||
if (rs->vblank_time_us.n > 20 && rs->vblank_time_us.n > nframes_in_10_seconds) {
|
||||
// We collected 10 seconds worth of samples, we assume the
|
||||
// estimated refresh rate is stable. We will still reset the
|
||||
// statistics if we get an outlier sample though, see above.
|
||||
return;
|
||||
}
|
||||
}
|
||||
cumulative_mean_and_var_update(&rs->vblank_time_us, time_us);
|
||||
}
|
||||
|
||||
void render_statistics_add_render_time_sample(struct render_statistics *rs, int time_us) {
|
||||
int oldest;
|
||||
if (rolling_window_push_back(&rs->render_times, time_us, &oldest)) {
|
||||
rolling_quantile_pop_front(&rs->render_time_quantile, oldest);
|
||||
}
|
||||
|
||||
rolling_quantile_push_back(&rs->render_time_quantile, time_us);
|
||||
}
|
||||
|
||||
/// How much time budget we should give to the backend for rendering, in microseconds.
|
||||
///
|
||||
/// A `divisor` is also returned, indicating the target framerate. The divisor is
|
||||
/// the number of vblanks we should wait between each frame. A divisor of 1 means
|
||||
/// full framerate, 2 means half framerate, etc.
|
||||
unsigned int
|
||||
render_statistics_get_budget(struct render_statistics *rs, unsigned int *divisor) {
|
||||
if (rs->render_times.nelem < rs->render_times.window_size) {
|
||||
// No valid render time estimates yet. Assume maximum budget.
|
||||
*divisor = 1;
|
||||
return UINT_MAX;
|
||||
}
|
||||
|
||||
// N-th percentile of render times, see render_statistics_init for N.
|
||||
auto render_time_percentile =
|
||||
rolling_quantile_estimate(&rs->render_time_quantile, &rs->render_times);
|
||||
auto vblank_time_us = render_statistics_get_vblank_time(rs);
|
||||
if (vblank_time_us == 0) {
|
||||
// We don't have a good estimate of the vblank time yet, so we
|
||||
// assume we can finish in one vblank.
|
||||
*divisor = 1;
|
||||
} else {
|
||||
*divisor =
|
||||
(unsigned int)(render_time_percentile / rs->vblank_time_us.mean + 1);
|
||||
}
|
||||
return (unsigned int)render_time_percentile;
|
||||
}
|
||||
|
||||
unsigned int render_statistics_get_vblank_time(struct render_statistics *rs) {
|
||||
if (rs->vblank_time_us.n <= 20 || rs->vblank_time_us.mean < 100) {
|
||||
// Not enough samples yet, or the vblank time is too short to be
|
||||
// meaningful. Assume maximum budget.
|
||||
return 0;
|
||||
}
|
||||
return (unsigned int)rs->vblank_time_us.mean;
|
||||
}
|
||||
|
||||
void render_statistics_reset(struct render_statistics *rs) {
|
||||
rolling_window_reset(&rs->render_times);
|
||||
rolling_quantile_reset(&rs->render_time_quantile);
|
||||
rs->vblank_time_us = (struct cumulative_mean_and_var){0};
|
||||
}
|
||||
|
||||
void render_statistics_destroy(struct render_statistics *rs) {
|
||||
render_statistics_reset(rs);
|
||||
rolling_window_destroy(&rs->render_times);
|
||||
rolling_quantile_destroy(&rs->render_time_quantile);
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
#pragma once
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
#define NTIERS (3)
|
||||
|
||||
struct render_statistics {
|
||||
/// Rolling window of rendering times (in us) and the tiers they belong to.
|
||||
/// We keep track of the tiers because the vblank time estimate can change over
|
||||
/// time.
|
||||
struct rolling_window render_times;
|
||||
/// Estimate the 95-th percentile of rendering times
|
||||
struct rolling_quantile render_time_quantile;
|
||||
/// Time between each vblanks
|
||||
struct cumulative_mean_and_var vblank_time_us;
|
||||
};
|
||||
|
||||
void render_statistics_init(struct render_statistics *rs, int window_size);
|
||||
void render_statistics_reset(struct render_statistics *rs);
|
||||
void render_statistics_destroy(struct render_statistics *rs);
|
||||
|
||||
void render_statistics_add_vblank_time_sample(struct render_statistics *rs, int time_us);
|
||||
void render_statistics_add_render_time_sample(struct render_statistics *rs, int time_us);
|
||||
|
||||
/// How much time budget we should give to the backend for rendering, in microseconds.
|
||||
///
|
||||
/// A `divisor` is also returned, indicating the target framerate. The divisor is
|
||||
/// the number of vblanks we should wait between each frame. A divisor of 1 means
|
||||
/// full framerate, 2 means half framerate, etc.
|
||||
unsigned int
|
||||
render_statistics_get_budget(struct render_statistics *rs, unsigned int *divisor);
|
||||
|
||||
unsigned int render_statistics_get_vblank_time(struct render_statistics *rs);
|
219
src/utils.c
219
src/utils.c
|
@ -48,7 +48,43 @@ int next_power_of_two(int n) {
|
|||
return n;
|
||||
}
|
||||
|
||||
/// Track the rolling maximum of a stream of integers.
|
||||
void rolling_window_destroy(struct rolling_window *rw) {
|
||||
free(rw->elem);
|
||||
rw->elem = NULL;
|
||||
}
|
||||
|
||||
void rolling_window_reset(struct rolling_window *rw) {
|
||||
rw->nelem = 0;
|
||||
rw->elem_head = 0;
|
||||
}
|
||||
|
||||
void rolling_window_init(struct rolling_window *rw, int size) {
|
||||
rw->elem = ccalloc(size, int);
|
||||
rw->window_size = size;
|
||||
rolling_window_reset(rw);
|
||||
}
|
||||
|
||||
int rolling_window_pop_front(struct rolling_window *rw) {
|
||||
assert(rw->nelem > 0);
|
||||
auto ret = rw->elem[rw->elem_head];
|
||||
rw->elem_head = (rw->elem_head + 1) % rw->window_size;
|
||||
rw->nelem--;
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool rolling_window_push_back(struct rolling_window *rw, int val, int *front) {
|
||||
bool full = rw->nelem == rw->window_size;
|
||||
if (full) {
|
||||
*front = rolling_window_pop_front(rw);
|
||||
}
|
||||
rw->elem[(rw->elem_head + rw->nelem) % rw->window_size] = val;
|
||||
rw->nelem++;
|
||||
return full;
|
||||
}
|
||||
|
||||
/// Track the maximum member of a FIFO queue of integers. Integers are pushed to the back
|
||||
/// and popped from the front, the maximum of the current members in the queue is
|
||||
/// tracked.
|
||||
struct rolling_max {
|
||||
/// A priority queue holding the indices of the maximum element candidates.
|
||||
/// The head of the queue is the index of the maximum element.
|
||||
|
@ -59,32 +95,26 @@ struct rolling_max {
|
|||
/// it's called the "original" indices.
|
||||
int *p;
|
||||
int p_head, np;
|
||||
|
||||
/// The elemets
|
||||
int *elem;
|
||||
int elem_head, nelem;
|
||||
|
||||
int window_size;
|
||||
/// The maximum number of in flight elements.
|
||||
int capacity;
|
||||
};
|
||||
|
||||
void rolling_max_destroy(struct rolling_max *rm) {
|
||||
free(rm->elem);
|
||||
free(rm->p);
|
||||
free(rm);
|
||||
}
|
||||
|
||||
struct rolling_max *rolling_max_new(int size) {
|
||||
struct rolling_max *rolling_max_new(int capacity) {
|
||||
auto rm = ccalloc(1, struct rolling_max);
|
||||
if (!rm) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rm->p = ccalloc(size, int);
|
||||
rm->elem = ccalloc(size, int);
|
||||
rm->window_size = size;
|
||||
if (!rm->p || !rm->elem) {
|
||||
rm->p = ccalloc(capacity, int);
|
||||
if (!rm->p) {
|
||||
goto err;
|
||||
}
|
||||
rm->capacity = capacity;
|
||||
|
||||
return rm;
|
||||
|
||||
|
@ -96,33 +126,21 @@ err:
|
|||
void rolling_max_reset(struct rolling_max *rm) {
|
||||
rm->p_head = 0;
|
||||
rm->np = 0;
|
||||
rm->nelem = 0;
|
||||
rm->elem_head = 0;
|
||||
}
|
||||
|
||||
void rolling_max_push(struct rolling_max *rm, int val) {
|
||||
#define IDX(n) ((n) % rm->window_size)
|
||||
if (rm->nelem == rm->window_size) {
|
||||
auto old_head = rm->elem_head;
|
||||
// Discard the oldest element.
|
||||
// rm->elem.pop_front();
|
||||
rm->nelem--;
|
||||
rm->elem_head = IDX(rm->elem_head + 1);
|
||||
|
||||
// Remove discarded element from the priority queue too.
|
||||
assert(rm->np);
|
||||
if (rm->p[rm->p_head] == old_head) {
|
||||
#define IDX(n) ((n) % rm->capacity)
|
||||
/// Remove the oldest element in the window. The caller must maintain the list of elements
|
||||
/// themselves, i.e. the behavior is undefined if `front` does not 1match the oldest
|
||||
/// element.
|
||||
void rolling_max_pop_front(struct rolling_max *rm, int front) {
|
||||
if (rm->p[rm->p_head] == front) {
|
||||
// rm->p.pop_front()
|
||||
rm->p_head = IDX(rm->p_head + 1);
|
||||
rm->np--;
|
||||
}
|
||||
}
|
||||
|
||||
// Add the new element to the queue.
|
||||
// rm->elem.push_back(val)
|
||||
rm->elem[IDX(rm->elem_head + rm->nelem)] = val;
|
||||
rm->nelem++;
|
||||
|
||||
void rolling_max_push_back(struct rolling_max *rm, int val) {
|
||||
// Update the prority queue.
|
||||
// Remove all elements smaller than the new element from the queue. Because
|
||||
// the new element will become the maximum element before them, and since they
|
||||
|
@ -130,7 +148,7 @@ void rolling_max_push(struct rolling_max *rm, int val) {
|
|||
// element, so they will never become the maximum element.
|
||||
while (rm->np) {
|
||||
int p_tail = IDX(rm->p_head + rm->np - 1);
|
||||
if (rm->elem[rm->p[p_tail]] > val) {
|
||||
if (rm->p[p_tail] > val) {
|
||||
break;
|
||||
}
|
||||
// rm->p.pop_back()
|
||||
|
@ -138,108 +156,119 @@ void rolling_max_push(struct rolling_max *rm, int val) {
|
|||
}
|
||||
// Add the new element to the end of the queue.
|
||||
// rm->p.push_back(rm->start_index + rm->nelem - 1)
|
||||
rm->p[IDX(rm->p_head + rm->np)] = IDX(rm->elem_head + rm->nelem - 1);
|
||||
assert(rm->np < rm->capacity);
|
||||
rm->p[IDX(rm->p_head + rm->np)] = val;
|
||||
rm->np++;
|
||||
#undef IDX
|
||||
}
|
||||
#undef IDX
|
||||
|
||||
int rolling_max_get_max(struct rolling_max *rm) {
|
||||
if (rm->np == 0) {
|
||||
return INT_MIN;
|
||||
}
|
||||
return rm->elem[rm->p[rm->p_head]];
|
||||
return rm->p[rm->p_head];
|
||||
}
|
||||
|
||||
TEST_CASE(rolling_max_test) {
|
||||
#define NELEM 15
|
||||
struct rolling_window queue;
|
||||
rolling_window_init(&queue, 3);
|
||||
auto rm = rolling_max_new(3);
|
||||
const int data[NELEM] = {1, 2, 3, 1, 4, 5, 2, 3, 6, 5, 4, 3, 2, 0, 0};
|
||||
const int expected_max[NELEM] = {1, 2, 3, 3, 4, 5, 5, 5, 6, 6, 6, 5, 4, 3, 2};
|
||||
int max[NELEM] = {0};
|
||||
for (int i = 0; i < NELEM; i++) {
|
||||
rolling_max_push(rm, data[i]);
|
||||
int front;
|
||||
bool full = rolling_window_push_back(&queue, data[i], &front);
|
||||
if (full) {
|
||||
rolling_max_pop_front(rm, front);
|
||||
}
|
||||
rolling_max_push_back(rm, data[i]);
|
||||
max[i] = rolling_max_get_max(rm);
|
||||
}
|
||||
rolling_window_destroy(&queue);
|
||||
rolling_max_destroy(rm);
|
||||
TEST_TRUE(memcmp(max, expected_max, sizeof(max)) == 0);
|
||||
#undef NELEM
|
||||
}
|
||||
|
||||
/// A rolling average of a stream of integers.
|
||||
struct rolling_avg {
|
||||
/// The sum of the elements in the window.
|
||||
int64_t sum;
|
||||
// Find the k-th smallest element in an array.
|
||||
int quickselect(int *elems, int nelem, int k) {
|
||||
int l = 0, r = nelem; // [l, r) is the range of candidates
|
||||
while (l != r) {
|
||||
int pivot = elems[l];
|
||||
int i = l, j = r;
|
||||
while (i < j) {
|
||||
while (i < j && elems[--j] >= pivot) {
|
||||
}
|
||||
elems[i] = elems[j];
|
||||
while (i < j && elems[++i] <= pivot) {
|
||||
}
|
||||
elems[j] = elems[i];
|
||||
}
|
||||
elems[i] = pivot;
|
||||
|
||||
/// The elements in the window.
|
||||
int *elem;
|
||||
int head, nelem;
|
||||
|
||||
int window_size;
|
||||
};
|
||||
|
||||
struct rolling_avg *rolling_avg_new(int size) {
|
||||
auto rm = ccalloc(1, struct rolling_avg);
|
||||
if (!rm) {
|
||||
return NULL;
|
||||
if (i == k) {
|
||||
break;
|
||||
}
|
||||
|
||||
rm->elem = ccalloc(size, int);
|
||||
rm->window_size = size;
|
||||
if (!rm->elem) {
|
||||
free(rm);
|
||||
return NULL;
|
||||
if (i < k) {
|
||||
l = i + 1;
|
||||
} else {
|
||||
r = i;
|
||||
}
|
||||
}
|
||||
return elems[k];
|
||||
}
|
||||
|
||||
return rm;
|
||||
void rolling_quantile_init(struct rolling_quantile *rq, int capacity, int mink, int maxk) {
|
||||
*rq = (struct rolling_quantile){0};
|
||||
rq->tmp_buffer = malloc(sizeof(int) * (size_t)capacity);
|
||||
rq->capacity = capacity;
|
||||
rq->min_target_rank = mink;
|
||||
rq->max_target_rank = maxk;
|
||||
}
|
||||
|
||||
void rolling_avg_destroy(struct rolling_avg *rm) {
|
||||
free(rm->elem);
|
||||
free(rm);
|
||||
void rolling_quantile_init_with_tolerance(struct rolling_quantile *rq, int window_size,
|
||||
double target, double tolerance) {
|
||||
rolling_quantile_init(rq, window_size, (int)((target - tolerance) * window_size),
|
||||
(int)((target + tolerance) * window_size));
|
||||
}
|
||||
|
||||
void rolling_avg_reset(struct rolling_avg *ra) {
|
||||
ra->sum = 0;
|
||||
ra->nelem = 0;
|
||||
ra->head = 0;
|
||||
void rolling_quantile_reset(struct rolling_quantile *rq) {
|
||||
rq->current_rank = 0;
|
||||
rq->estimate = 0;
|
||||
}
|
||||
|
||||
void rolling_avg_push(struct rolling_avg *ra, int val) {
|
||||
if (ra->nelem == ra->window_size) {
|
||||
// Discard the oldest element.
|
||||
// rm->elem.pop_front();
|
||||
ra->sum -= ra->elem[ra->head % ra->window_size];
|
||||
ra->nelem--;
|
||||
ra->head = (ra->head + 1) % ra->window_size;
|
||||
void rolling_quantile_destroy(struct rolling_quantile *rq) {
|
||||
free(rq->tmp_buffer);
|
||||
}
|
||||
|
||||
// Add the new element to the queue.
|
||||
// rm->elem.push_back(val)
|
||||
ra->elem[(ra->head + ra->nelem) % ra->window_size] = val;
|
||||
ra->sum += val;
|
||||
ra->nelem++;
|
||||
int rolling_quantile_estimate(struct rolling_quantile *rq, struct rolling_window *elements) {
|
||||
if (rq->current_rank < rq->min_target_rank || rq->current_rank > rq->max_target_rank) {
|
||||
if (elements->nelem != elements->window_size) {
|
||||
return INT_MIN;
|
||||
}
|
||||
// Re-estimate the quantile.
|
||||
assert(elements->nelem <= rq->capacity);
|
||||
rolling_window_copy_to_array(elements, rq->tmp_buffer);
|
||||
const int target_rank =
|
||||
rq->min_target_rank + (rq->max_target_rank - rq->min_target_rank) / 2;
|
||||
rq->estimate = quickselect(rq->tmp_buffer, elements->nelem, target_rank);
|
||||
rq->current_rank = target_rank;
|
||||
}
|
||||
return rq->estimate;
|
||||
}
|
||||
|
||||
double rolling_avg_get_avg(struct rolling_avg *ra) {
|
||||
if (ra->nelem == 0) {
|
||||
return 0;
|
||||
void rolling_quantile_push_back(struct rolling_quantile *rq, int x) {
|
||||
if (x <= rq->estimate) {
|
||||
rq->current_rank++;
|
||||
}
|
||||
return (double)ra->sum / (double)ra->nelem;
|
||||
}
|
||||
|
||||
TEST_CASE(rolling_avg_test) {
|
||||
#define NELEM 15
|
||||
auto rm = rolling_avg_new(3);
|
||||
const int data[NELEM] = {1, 2, 3, 1, 4, 5, 2, 3, 6, 5, 4, 3, 2, 0, 0};
|
||||
const double expected_avg[NELEM] = {
|
||||
1, 1.5, 2, 2, 8.0 / 3.0, 10.0 / 3.0, 11.0 / 3.0, 10.0 / 3.0,
|
||||
11.0 / 3.0, 14.0 / 3.0, 5, 4, 3, 5.0 / 3.0, 2.0 / 3.0};
|
||||
double avg[NELEM] = {0};
|
||||
for (int i = 0; i < NELEM; i++) {
|
||||
rolling_avg_push(rm, data[i]);
|
||||
avg[i] = rolling_avg_get_avg(rm);
|
||||
}
|
||||
for (int i = 0; i < NELEM; i++) {
|
||||
TEST_EQUAL(avg[i], expected_avg[i]);
|
||||
void rolling_quantile_pop_front(struct rolling_quantile *rq, int x) {
|
||||
if (x <= rq->estimate) {
|
||||
rq->current_rank--;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
94
src/utils.h
94
src/utils.h
|
@ -17,6 +17,7 @@
|
|||
#include <time.h>
|
||||
|
||||
#include "compiler.h"
|
||||
#include "log.h"
|
||||
#include "types.h"
|
||||
|
||||
#define ARR_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
|
||||
|
@ -289,20 +290,97 @@ static inline void free_charpp(char **str) {
|
|||
///
|
||||
int next_power_of_two(int n);
|
||||
|
||||
struct rolling_window {
|
||||
int *elem;
|
||||
int elem_head, nelem;
|
||||
int window_size;
|
||||
};
|
||||
|
||||
void rolling_window_destroy(struct rolling_window *rw);
|
||||
void rolling_window_reset(struct rolling_window *rw);
|
||||
void rolling_window_init(struct rolling_window *rw, int size);
|
||||
int rolling_window_pop_front(struct rolling_window *rw);
|
||||
bool rolling_window_push_back(struct rolling_window *rw, int val, int *front);
|
||||
|
||||
/// Copy the contents of the rolling window to an array. The array is assumed to
|
||||
/// have enough space to hold the contents of the rolling window.
|
||||
static inline void attr_unused rolling_window_copy_to_array(struct rolling_window *rw,
|
||||
int *arr) {
|
||||
// The length from head to the end of the array
|
||||
auto head_len = (size_t)(rw->window_size - rw->elem_head);
|
||||
if (head_len >= (size_t)rw->nelem) {
|
||||
memcpy(arr, rw->elem + rw->elem_head, sizeof(int) * (size_t)rw->nelem);
|
||||
} else {
|
||||
auto tail_len = (size_t)((size_t)rw->nelem - head_len);
|
||||
memcpy(arr, rw->elem + rw->elem_head, sizeof(int) * head_len);
|
||||
memcpy(arr + head_len, rw->elem, sizeof(int) * tail_len);
|
||||
}
|
||||
}
|
||||
|
||||
struct rolling_max;
|
||||
|
||||
struct rolling_max *rolling_max_new(int window_size);
|
||||
struct rolling_max *rolling_max_new(int capacity);
|
||||
void rolling_max_destroy(struct rolling_max *rm);
|
||||
void rolling_max_reset(struct rolling_max *rm);
|
||||
void rolling_max_push(struct rolling_max *rm, int val);
|
||||
void rolling_max_pop_front(struct rolling_max *rm, int front);
|
||||
void rolling_max_push_back(struct rolling_max *rm, int val);
|
||||
int rolling_max_get_max(struct rolling_max *rm);
|
||||
|
||||
struct rolling_avg;
|
||||
struct rolling_avg *rolling_avg_new(int window_size);
|
||||
void rolling_avg_destroy(struct rolling_avg *ra);
|
||||
void rolling_avg_reset(struct rolling_avg *ra);
|
||||
void rolling_avg_push(struct rolling_avg *ra, int val);
|
||||
double rolling_avg_get_avg(struct rolling_avg *ra);
|
||||
/// Estimate the mean and variance of random variable X using Welford's online
|
||||
/// algorithm.
|
||||
struct cumulative_mean_and_var {
|
||||
double mean;
|
||||
double m2;
|
||||
unsigned int n;
|
||||
};
|
||||
|
||||
static inline attr_unused void
|
||||
cumulative_mean_and_var_init(struct cumulative_mean_and_var *cmv) {
|
||||
*cmv = (struct cumulative_mean_and_var){0};
|
||||
}
|
||||
|
||||
static inline attr_unused void
|
||||
cumulative_mean_and_var_update(struct cumulative_mean_and_var *cmv, double x) {
|
||||
if (cmv->n == UINT_MAX) {
|
||||
// We have too many elements, let's keep the mean and variance.
|
||||
return;
|
||||
}
|
||||
cmv->n++;
|
||||
double delta = x - cmv->mean;
|
||||
cmv->mean += delta / (double)cmv->n;
|
||||
cmv->m2 += delta * (x - cmv->mean);
|
||||
}
|
||||
|
||||
static inline attr_unused double
|
||||
cumulative_mean_and_var_get_var(struct cumulative_mean_and_var *cmv) {
|
||||
if (cmv->n < 2) {
|
||||
return 0;
|
||||
}
|
||||
return cmv->m2 / (double)(cmv->n - 1);
|
||||
}
|
||||
|
||||
// Find the k-th smallest element in an array.
|
||||
int quickselect(int *elems, int nelem, int k);
|
||||
|
||||
/// A naive quantile estimator.
|
||||
///
|
||||
/// Estimates the N-th percentile of a random variable X in a sliding window.
|
||||
struct rolling_quantile {
|
||||
int current_rank;
|
||||
int min_target_rank, max_target_rank;
|
||||
int estimate;
|
||||
int capacity;
|
||||
int *tmp_buffer;
|
||||
};
|
||||
|
||||
void rolling_quantile_init(struct rolling_quantile *rq, int capacity, int mink, int maxk);
|
||||
void rolling_quantile_init_with_tolerance(struct rolling_quantile *rq, int window_size,
|
||||
double target, double tolerance);
|
||||
void rolling_quantile_reset(struct rolling_quantile *rq);
|
||||
void rolling_quantile_destroy(struct rolling_quantile *rq);
|
||||
int rolling_quantile_estimate(struct rolling_quantile *rq, struct rolling_window *elements);
|
||||
void rolling_quantile_push_back(struct rolling_quantile *rq, int x);
|
||||
void rolling_quantile_pop_front(struct rolling_quantile *rq, int x);
|
||||
|
||||
// Some versions of the Android libc do not have timespec_get(), use
|
||||
// clock_gettime() instead.
|
||||
|
|
Loading…
Reference in New Issue