diff --git a/src/backend/backend.c b/src/backend/backend.c index 3543ee12..bacf29d2 100644 --- a/src/backend/backend.c +++ b/src/backend/backend.c @@ -176,6 +176,20 @@ void paint_all_new(session_t *ps, struct managed_win *t) { region_t reg_shadow_clip; pixman_region32_init(®_shadow_clip); + struct timespec now; + clock_gettime(CLOCK_MONOTONIC, &now); + auto now_us = (uint64_t)(now.tv_sec * 1000000 + now.tv_nsec / 1000); + if (ps->next_render > 0) { + log_trace("Render schedule deviation: %ld us (%s) %ld %ld", + labs((int64_t)now_us - (int64_t)ps->next_render), + now_us < ps->next_render ? "early" : "late", now_us, + ps->next_render); + ps->last_schedule_delay = 0; + if (now_us > ps->next_render) { + ps->last_schedule_delay = now_us - ps->next_render; + } + } + if (ps->backend_data->ops->prepare) { ps->backend_data->ops->prepare(ps->backend_data, ®_paint); } diff --git a/src/common.h b/src/common.h index b76863df..65d0e7b6 100644 --- a/src/common.h +++ b/src/common.h @@ -58,6 +58,7 @@ #include "list.h" #include "region.h" #include "render.h" +#include "statistics.h" #include "types.h" #include "utils.h" #include "win_defs.h" @@ -249,13 +250,16 @@ typedef struct session { /// When the currently rendered frame will be displayed. /// 0 means there is no pending frame. uint64_t target_msc; - /// When did we render our last frame. - uint64_t last_render; + /// The delay between when the last frame was scheduled to be rendered, and when + /// the render actually started. + uint64_t last_schedule_delay; + /// When do we want our next frame to start rendering. + uint64_t next_render; /// Whether we can perform frame pacing. bool frame_pacing; - struct rolling_avg *frame_time; - struct rolling_max *render_stats; + /// Render statistics + struct render_statistics render_stats; // === Operation related === /// Flags related to the root window diff --git a/src/meson.build b/src/meson.build index 0cd87c89..074683b2 100644 --- a/src/meson.build +++ b/src/meson.build @@ -9,7 +9,7 @@ base_deps = [ srcs = [ files('picom.c', 'win.c', 'c2.c', 'x.c', 'config.c', 'vsync.c', 'utils.c', 'diagnostic.c', 'string_utils.c', 'render.c', 'kernel.c', 'log.c', - 'options.c', 'event.c', 'cache.c', 'atom.c', 'file_watch.c') ] + 'options.c', 'event.c', 'cache.c', 'atom.c', 'file_watch.c', 'statistics.c') ] picom_inc = include_directories('.') cflags = [] diff --git a/src/picom.c b/src/picom.c index 8f217b14..8bb22d7d 100644 --- a/src/picom.c +++ b/src/picom.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -43,7 +44,6 @@ #endif #include "backend/backend.h" #include "c2.h" -#include "config.h" #include "diagnostic.h" #include "log.h" #include "region.h" @@ -60,6 +60,7 @@ #include "file_watch.h" #include "list.h" #include "options.h" +#include "statistics.h" #include "uthash_extra.h" /// Get session_t pointer from a pointer to a member of session_t @@ -177,8 +178,17 @@ static inline struct managed_win *find_win_all(session_t *ps, const xcb_window_t /// 4. otherwise, we schedule a render for that target frame. we use past statistics about /// how long our renders took to figure out when to start rendering. we start rendering /// at the latest point of time possible to still hit the target frame. -void schedule_render(session_t *ps) { +/// +/// The `triggered_by_timer` parameter is used to indicate whether this function is +/// triggered by a steady timer, i.e. we are rendering for each vblank. The other case is +/// when we stop rendering for a while because there is no changes on screen, then +/// something changed and schedule_render is triggered by a DamageNotify. The idea is that +/// when the schedule is triggered by a steady timer, schedule_render will be called at a +/// predictable offset into each vblank. + +void schedule_render(session_t *ps, bool triggered_by_vblank) { double delay_s = 0; + ps->next_render = 0; if (!ps->frame_pacing || !ps->redirected) { // Not doing frame pacing, schedule a render immediately, if not already // scheduled. @@ -205,59 +215,77 @@ void schedule_render(session_t *ps) { // We missed our target, push it back one frame ps->target_msc = ps->last_msc + 1; } + log_trace("Still rendering for target frame %ld, not scheduling another " + "render", + ps->target_msc); return; } if (ps->target_msc > ps->last_msc) { - // Render for the target frame is completed, and has yet to be displayed. + // Render for the target frame is completed, but is yet to be displayed. // Don't schedule another render. + log_trace("Target frame %ld is in the future, and we have already " + "rendered, last msc: %d", + ps->target_msc, (int)ps->last_msc); return; } struct timespec now; clock_gettime(CLOCK_MONOTONIC, &now); auto now_us = (uint64_t)now.tv_sec * 1000000 + (uint64_t)now.tv_nsec / 1000; + if (triggered_by_vblank) { + log_trace("vblank schedule delay: %ld us", now_us - ps->last_msc_instant); + } int render_time_us = (int)(render_time.tv_sec * 1000000L + render_time.tv_nsec / 1000L); if (ps->target_msc == ps->last_msc) { // The frame has just been displayed, record its render time; - log_trace("Last render call took: %d us, rolling_max: %d us", - render_time_us, rolling_max_get_max(ps->render_stats)); - rolling_max_push(ps->render_stats, render_time_us); + log_trace("Last render call took: %d (gpu) + %d (cpu) us, " + "last_msc: %" PRIu64, + render_time_us, (int)ps->last_schedule_delay, ps->last_msc); + render_statistics_add_render_time_sample( + &ps->render_stats, render_time_us + (int)ps->last_schedule_delay); ps->target_msc = 0; + ps->last_schedule_delay = 0; } - // TODO(yshui): why 1500? - const int SLACK = 1500; - int render_time_estimate = rolling_max_get_max(ps->render_stats); - auto frame_time = (uint64_t)rolling_avg_get_avg(ps->frame_time); - if (render_time_estimate < 0 || frame_time == 0) { - // We don't have render time, and/or frame time estimates, maybe there's + unsigned int divisor = 0; + auto render_budget = render_statistics_get_budget(&ps->render_stats, &divisor); + auto frame_time = render_statistics_get_vblank_time(&ps->render_stats); + if (frame_time == 0) { + // We don't have enough data for render time estimates, maybe there's // no frame rendered yet, or the backend doesn't support render timing // information, schedule render immediately. ps->target_msc = ps->last_msc + 1; goto schedule; } - render_time_estimate += SLACK; - auto minimal_target_us = now_us + (uint64_t)render_time_estimate; - auto frame_delay = (uint64_t)ceil( - (double)(minimal_target_us - ps->last_msc_instant) / (double)frame_time); - auto deadline = ps->last_msc_instant + frame_delay * frame_time; - - ps->target_msc = ps->last_msc + frame_delay; - auto delay = deadline - (uint64_t)render_time_estimate - now_us; - delay_s = (double)delay / 1000000.0; - if (delay_s > 1) { - log_warn("Delay too long: %f s, render_time: %d us, frame_time: %" PRIu64 - " us, now_us: %" PRIu64 " us, next_msc: %" PRIu64 " us", - delay_s, render_time_estimate, frame_time, now_us, deadline); + const auto deadline = ps->last_msc_instant + (unsigned long)divisor * frame_time; + unsigned int available = 0; + if (deadline > now_us) { + available = (unsigned int)(deadline - now_us); } - log_trace("Delay: %lu us, last_msc: %" PRIu64 ", render_time: %d, frame_time: " - "%" PRIu64 ", now_us: %" PRIu64 ", next_msc: %" PRIu64, - delay, ps->last_msc_instant, render_time_estimate, frame_time, now_us, - deadline); + ps->target_msc = ps->last_msc + divisor; + if (available > render_budget) { + delay_s = (double)(available - render_budget) / 1000000.0; + ps->next_render = deadline - render_budget; + } else { + delay_s = 0; + ps->next_render = now_us; + } + if (delay_s > 1) { + log_warn("Delay too long: %f s, render_budget: %d us, frame_time: " + "%" PRIu32 " us, now_us: %" PRIu64 " us, next_msc: %" PRIu64 " u" + "s", + delay_s, render_budget, frame_time, now_us, deadline); + } + + log_trace("Delay: %.6lf s, last_msc: %" PRIu64 ", render_budget: %d, frame_time: " + "%" PRIu32 ", now_us: %" PRIu64 ", next_msc: %" PRIu64 ", " + "target_msc: %" PRIu64 ", divisor: %d", + delay_s, ps->last_msc_instant, render_budget, frame_time, now_us, + deadline, ps->target_msc, divisor); schedule: assert(!ev_is_active(&ps->draw_timer)); @@ -282,7 +310,7 @@ void queue_redraw(session_t *ps) { // If frame pacing is not enabled, pretend this is false. // If --benchmark is used, redraw is always queued if (!ps->redraw_needed && !ps->o.benchmark) { - schedule_render(ps); + schedule_render(ps, false); } ps->redraw_needed = true; } @@ -758,7 +786,6 @@ static void configure_root(session_t *ps) { } force_repaint(ps); } - return; } static void handle_root_flags(session_t *ps) { @@ -1439,10 +1466,9 @@ static bool redirect_start(session_t *ps) { // states. ps->last_msc_instant = 0; ps->last_msc = 0; - ps->last_render = 0; + ps->last_schedule_delay = 0; ps->target_msc = 0; - rolling_avg_reset(ps->frame_time); - rolling_max_reset(ps->render_stats); + render_statistics_reset(&ps->render_stats); } else if (ps->frame_pacing) { log_error("Present extension is not supported, frame pacing disabled."); ps->frame_pacing = false; @@ -1543,11 +1569,12 @@ handle_present_complete_notify(session_t *ps, xcb_present_complete_notify_event_ if (ps->last_msc_instant != 0) { auto frame_count = cne->msc - ps->last_msc; int frame_time = (int)((cne->ust - ps->last_msc_instant) / frame_count); - rolling_avg_push(ps->frame_time, frame_time); - log_trace("Frame count %lu, frame time: %d us, rolling average: %lf us, " + render_statistics_add_vblank_time_sample(&ps->render_stats, frame_time); + log_trace("Frame count %lu, frame time: %d us, rolling average: %u us, " "msc: %" PRIu64 ", offset: %d us", - frame_count, frame_time, rolling_avg_get_avg(ps->frame_time), - cne->ust, (int)drift); + frame_count, frame_time, + render_statistics_get_vblank_time(&ps->render_stats), cne->ust, + (int)drift); } else if (drift > 1000000 && ps->frame_pacing) { // This is the first MSC event we receive, let's check if the timestamps // align with the monotonic clock. If not, disable frame pacing because we @@ -1560,7 +1587,7 @@ handle_present_complete_notify(session_t *ps, xcb_present_complete_notify_event_ ps->last_msc_instant = cne->ust; ps->last_msc = cne->msc; if (ps->redraw_needed) { - schedule_render(ps); + schedule_render(ps, true); } } @@ -1651,6 +1678,8 @@ static void tmout_unredir_callback(EV_P attr_unused, ev_timer *w, int revents at } static void fade_timer_callback(EV_P attr_unused, ev_timer *w, int revents attr_unused) { + // TODO(yshui): do we still need the fade timer? we queue redraw automatically in + // draw_callback_impl if animation is running. session_t *ps = session_ptr(w, fade_timer); queue_redraw(ps); } @@ -1778,12 +1807,6 @@ static void draw_callback_impl(EV_P_ session_t *ps, int revents attr_unused) { } log_trace("Render end"); - struct timespec now; - clock_gettime(CLOCK_MONOTONIC, &now); - uint64_t now_us = - (uint64_t)now.tv_sec * 1000000ULL + (uint64_t)now.tv_nsec / 1000; - ps->last_render = now_us; - ps->first_frame = false; paint++; if (ps->o.benchmark && paint >= ps->o.benchmark) { @@ -2004,8 +2027,7 @@ static session_t *session_init(int argc, char **argv, Display *dpy, pixman_region32_init(&ps->screen_reg); // TODO(yshui) investigate what's the best window size - ps->render_stats = rolling_max_new(128); - ps->frame_time = rolling_avg_new(16); + render_statistics_init(&ps->render_stats, 128); ps->pending_reply_tail = &ps->pending_reply_head; @@ -2642,8 +2664,7 @@ static void session_destroy(session_t *ps) { free(ps->o.glx_fshader_win_str); x_free_randr_info(ps); - rolling_avg_destroy(ps->frame_time); - rolling_max_destroy(ps->render_stats); + render_statistics_destroy(&ps->render_stats); // Release custom window shaders free(ps->o.window_shader_fg); diff --git a/src/statistics.c b/src/statistics.c new file mode 100644 index 00000000..5e3d9249 --- /dev/null +++ b/src/statistics.c @@ -0,0 +1,100 @@ +//! Rendering statistics +//! +//! Tracks how long it takes to render a frame, for measuring performance, and for pacing +//! the frames. + +#include "statistics.h" +#include "log.h" +#include "utils.h" + +void render_statistics_init(struct render_statistics *rs, int window_size) { + *rs = (struct render_statistics){0}; + + rolling_window_init(&rs->render_times, window_size); + rolling_quantile_init_with_tolerance(&rs->render_time_quantile, window_size, + /* q */ 0.98, /* tolerance */ 0.01); +} + +void render_statistics_add_vblank_time_sample(struct render_statistics *rs, int time_us) { + auto sample_sd = sqrt(cumulative_mean_and_var_get_var(&rs->vblank_time_us)); + auto current_estimate = render_statistics_get_vblank_time(rs); + if (current_estimate != 0 && fabs((double)time_us - current_estimate) > sample_sd * 3) { + // Deviated from the mean by more than 3 sigma (p < 0.003) + log_debug("vblank time outlier: %d %f %f", time_us, rs->vblank_time_us.mean, + cumulative_mean_and_var_get_var(&rs->vblank_time_us)); + // An outlier sample, this could mean things like refresh rate changes, so + // we reset the statistics. This could also be benign, but we like to be + // cautious. + cumulative_mean_and_var_init(&rs->vblank_time_us); + } + + if (rs->vblank_time_us.mean != 0) { + auto nframes_in_10_seconds = + (unsigned int)(10. * 1000000. / rs->vblank_time_us.mean); + if (rs->vblank_time_us.n > 20 && rs->vblank_time_us.n > nframes_in_10_seconds) { + // We collected 10 seconds worth of samples, we assume the + // estimated refresh rate is stable. We will still reset the + // statistics if we get an outlier sample though, see above. + return; + } + } + cumulative_mean_and_var_update(&rs->vblank_time_us, time_us); +} + +void render_statistics_add_render_time_sample(struct render_statistics *rs, int time_us) { + int oldest; + if (rolling_window_push_back(&rs->render_times, time_us, &oldest)) { + rolling_quantile_pop_front(&rs->render_time_quantile, oldest); + } + + rolling_quantile_push_back(&rs->render_time_quantile, time_us); +} + +/// How much time budget we should give to the backend for rendering, in microseconds. +/// +/// A `divisor` is also returned, indicating the target framerate. The divisor is +/// the number of vblanks we should wait between each frame. A divisor of 1 means +/// full framerate, 2 means half framerate, etc. +unsigned int +render_statistics_get_budget(struct render_statistics *rs, unsigned int *divisor) { + if (rs->render_times.nelem < rs->render_times.window_size) { + // No valid render time estimates yet. Assume maximum budget. + *divisor = 1; + return UINT_MAX; + } + + // N-th percentile of render times, see render_statistics_init for N. + auto render_time_percentile = + rolling_quantile_estimate(&rs->render_time_quantile, &rs->render_times); + auto vblank_time_us = render_statistics_get_vblank_time(rs); + if (vblank_time_us == 0) { + // We don't have a good estimate of the vblank time yet, so we + // assume we can finish in one vblank. + *divisor = 1; + } else { + *divisor = + (unsigned int)(render_time_percentile / rs->vblank_time_us.mean + 1); + } + return (unsigned int)render_time_percentile; +} + +unsigned int render_statistics_get_vblank_time(struct render_statistics *rs) { + if (rs->vblank_time_us.n <= 20 || rs->vblank_time_us.mean < 100) { + // Not enough samples yet, or the vblank time is too short to be + // meaningful. Assume maximum budget. + return 0; + } + return (unsigned int)rs->vblank_time_us.mean; +} + +void render_statistics_reset(struct render_statistics *rs) { + rolling_window_reset(&rs->render_times); + rolling_quantile_reset(&rs->render_time_quantile); + rs->vblank_time_us = (struct cumulative_mean_and_var){0}; +} + +void render_statistics_destroy(struct render_statistics *rs) { + render_statistics_reset(rs); + rolling_window_destroy(&rs->render_times); + rolling_quantile_destroy(&rs->render_time_quantile); +} diff --git a/src/statistics.h b/src/statistics.h new file mode 100644 index 00000000..67d02119 --- /dev/null +++ b/src/statistics.h @@ -0,0 +1,33 @@ +#pragma once + +#include "utils.h" + +#define NTIERS (3) + +struct render_statistics { + /// Rolling window of rendering times (in us) and the tiers they belong to. + /// We keep track of the tiers because the vblank time estimate can change over + /// time. + struct rolling_window render_times; + /// Estimate the 95-th percentile of rendering times + struct rolling_quantile render_time_quantile; + /// Time between each vblanks + struct cumulative_mean_and_var vblank_time_us; +}; + +void render_statistics_init(struct render_statistics *rs, int window_size); +void render_statistics_reset(struct render_statistics *rs); +void render_statistics_destroy(struct render_statistics *rs); + +void render_statistics_add_vblank_time_sample(struct render_statistics *rs, int time_us); +void render_statistics_add_render_time_sample(struct render_statistics *rs, int time_us); + +/// How much time budget we should give to the backend for rendering, in microseconds. +/// +/// A `divisor` is also returned, indicating the target framerate. The divisor is +/// the number of vblanks we should wait between each frame. A divisor of 1 means +/// full framerate, 2 means half framerate, etc. +unsigned int +render_statistics_get_budget(struct render_statistics *rs, unsigned int *divisor); + +unsigned int render_statistics_get_vblank_time(struct render_statistics *rs); diff --git a/src/utils.c b/src/utils.c index a1114a04..d26afa33 100644 --- a/src/utils.c +++ b/src/utils.c @@ -48,7 +48,43 @@ int next_power_of_two(int n) { return n; } -/// Track the rolling maximum of a stream of integers. +void rolling_window_destroy(struct rolling_window *rw) { + free(rw->elem); + rw->elem = NULL; +} + +void rolling_window_reset(struct rolling_window *rw) { + rw->nelem = 0; + rw->elem_head = 0; +} + +void rolling_window_init(struct rolling_window *rw, int size) { + rw->elem = ccalloc(size, int); + rw->window_size = size; + rolling_window_reset(rw); +} + +int rolling_window_pop_front(struct rolling_window *rw) { + assert(rw->nelem > 0); + auto ret = rw->elem[rw->elem_head]; + rw->elem_head = (rw->elem_head + 1) % rw->window_size; + rw->nelem--; + return ret; +} + +bool rolling_window_push_back(struct rolling_window *rw, int val, int *front) { + bool full = rw->nelem == rw->window_size; + if (full) { + *front = rolling_window_pop_front(rw); + } + rw->elem[(rw->elem_head + rw->nelem) % rw->window_size] = val; + rw->nelem++; + return full; +} + +/// Track the maximum member of a FIFO queue of integers. Integers are pushed to the back +/// and popped from the front, the maximum of the current members in the queue is +/// tracked. struct rolling_max { /// A priority queue holding the indices of the maximum element candidates. /// The head of the queue is the index of the maximum element. @@ -59,32 +95,26 @@ struct rolling_max { /// it's called the "original" indices. int *p; int p_head, np; - - /// The elemets - int *elem; - int elem_head, nelem; - - int window_size; + /// The maximum number of in flight elements. + int capacity; }; void rolling_max_destroy(struct rolling_max *rm) { - free(rm->elem); free(rm->p); free(rm); } -struct rolling_max *rolling_max_new(int size) { +struct rolling_max *rolling_max_new(int capacity) { auto rm = ccalloc(1, struct rolling_max); if (!rm) { return NULL; } - rm->p = ccalloc(size, int); - rm->elem = ccalloc(size, int); - rm->window_size = size; - if (!rm->p || !rm->elem) { + rm->p = ccalloc(capacity, int); + if (!rm->p) { goto err; } + rm->capacity = capacity; return rm; @@ -96,33 +126,21 @@ err: void rolling_max_reset(struct rolling_max *rm) { rm->p_head = 0; rm->np = 0; - rm->nelem = 0; - rm->elem_head = 0; } -void rolling_max_push(struct rolling_max *rm, int val) { -#define IDX(n) ((n) % rm->window_size) - if (rm->nelem == rm->window_size) { - auto old_head = rm->elem_head; - // Discard the oldest element. - // rm->elem.pop_front(); - rm->nelem--; - rm->elem_head = IDX(rm->elem_head + 1); - - // Remove discarded element from the priority queue too. - assert(rm->np); - if (rm->p[rm->p_head] == old_head) { - // rm->p.pop_front() - rm->p_head = IDX(rm->p_head + 1); - rm->np--; - } +#define IDX(n) ((n) % rm->capacity) +/// Remove the oldest element in the window. The caller must maintain the list of elements +/// themselves, i.e. the behavior is undefined if `front` does not 1match the oldest +/// element. +void rolling_max_pop_front(struct rolling_max *rm, int front) { + if (rm->p[rm->p_head] == front) { + // rm->p.pop_front() + rm->p_head = IDX(rm->p_head + 1); + rm->np--; } +} - // Add the new element to the queue. - // rm->elem.push_back(val) - rm->elem[IDX(rm->elem_head + rm->nelem)] = val; - rm->nelem++; - +void rolling_max_push_back(struct rolling_max *rm, int val) { // Update the prority queue. // Remove all elements smaller than the new element from the queue. Because // the new element will become the maximum element before them, and since they @@ -130,7 +148,7 @@ void rolling_max_push(struct rolling_max *rm, int val) { // element, so they will never become the maximum element. while (rm->np) { int p_tail = IDX(rm->p_head + rm->np - 1); - if (rm->elem[rm->p[p_tail]] > val) { + if (rm->p[p_tail] > val) { break; } // rm->p.pop_back() @@ -138,108 +156,119 @@ void rolling_max_push(struct rolling_max *rm, int val) { } // Add the new element to the end of the queue. // rm->p.push_back(rm->start_index + rm->nelem - 1) - rm->p[IDX(rm->p_head + rm->np)] = IDX(rm->elem_head + rm->nelem - 1); + assert(rm->np < rm->capacity); + rm->p[IDX(rm->p_head + rm->np)] = val; rm->np++; -#undef IDX } +#undef IDX int rolling_max_get_max(struct rolling_max *rm) { if (rm->np == 0) { return INT_MIN; } - return rm->elem[rm->p[rm->p_head]]; + return rm->p[rm->p_head]; } TEST_CASE(rolling_max_test) { #define NELEM 15 + struct rolling_window queue; + rolling_window_init(&queue, 3); auto rm = rolling_max_new(3); const int data[NELEM] = {1, 2, 3, 1, 4, 5, 2, 3, 6, 5, 4, 3, 2, 0, 0}; const int expected_max[NELEM] = {1, 2, 3, 3, 4, 5, 5, 5, 6, 6, 6, 5, 4, 3, 2}; int max[NELEM] = {0}; for (int i = 0; i < NELEM; i++) { - rolling_max_push(rm, data[i]); + int front; + bool full = rolling_window_push_back(&queue, data[i], &front); + if (full) { + rolling_max_pop_front(rm, front); + } + rolling_max_push_back(rm, data[i]); max[i] = rolling_max_get_max(rm); } + rolling_window_destroy(&queue); + rolling_max_destroy(rm); TEST_TRUE(memcmp(max, expected_max, sizeof(max)) == 0); #undef NELEM } -/// A rolling average of a stream of integers. -struct rolling_avg { - /// The sum of the elements in the window. - int64_t sum; +// Find the k-th smallest element in an array. +int quickselect(int *elems, int nelem, int k) { + int l = 0, r = nelem; // [l, r) is the range of candidates + while (l != r) { + int pivot = elems[l]; + int i = l, j = r; + while (i < j) { + while (i < j && elems[--j] >= pivot) { + } + elems[i] = elems[j]; + while (i < j && elems[++i] <= pivot) { + } + elems[j] = elems[i]; + } + elems[i] = pivot; - /// The elements in the window. - int *elem; - int head, nelem; + if (i == k) { + break; + } - int window_size; -}; - -struct rolling_avg *rolling_avg_new(int size) { - auto rm = ccalloc(1, struct rolling_avg); - if (!rm) { - return NULL; + if (i < k) { + l = i + 1; + } else { + r = i; + } } - - rm->elem = ccalloc(size, int); - rm->window_size = size; - if (!rm->elem) { - free(rm); - return NULL; - } - - return rm; + return elems[k]; } -void rolling_avg_destroy(struct rolling_avg *rm) { - free(rm->elem); - free(rm); +void rolling_quantile_init(struct rolling_quantile *rq, int capacity, int mink, int maxk) { + *rq = (struct rolling_quantile){0}; + rq->tmp_buffer = malloc(sizeof(int) * (size_t)capacity); + rq->capacity = capacity; + rq->min_target_rank = mink; + rq->max_target_rank = maxk; } -void rolling_avg_reset(struct rolling_avg *ra) { - ra->sum = 0; - ra->nelem = 0; - ra->head = 0; +void rolling_quantile_init_with_tolerance(struct rolling_quantile *rq, int window_size, + double target, double tolerance) { + rolling_quantile_init(rq, window_size, (int)((target - tolerance) * window_size), + (int)((target + tolerance) * window_size)); } -void rolling_avg_push(struct rolling_avg *ra, int val) { - if (ra->nelem == ra->window_size) { - // Discard the oldest element. - // rm->elem.pop_front(); - ra->sum -= ra->elem[ra->head % ra->window_size]; - ra->nelem--; - ra->head = (ra->head + 1) % ra->window_size; +void rolling_quantile_reset(struct rolling_quantile *rq) { + rq->current_rank = 0; + rq->estimate = 0; +} + +void rolling_quantile_destroy(struct rolling_quantile *rq) { + free(rq->tmp_buffer); +} + +int rolling_quantile_estimate(struct rolling_quantile *rq, struct rolling_window *elements) { + if (rq->current_rank < rq->min_target_rank || rq->current_rank > rq->max_target_rank) { + if (elements->nelem != elements->window_size) { + return INT_MIN; + } + // Re-estimate the quantile. + assert(elements->nelem <= rq->capacity); + rolling_window_copy_to_array(elements, rq->tmp_buffer); + const int target_rank = + rq->min_target_rank + (rq->max_target_rank - rq->min_target_rank) / 2; + rq->estimate = quickselect(rq->tmp_buffer, elements->nelem, target_rank); + rq->current_rank = target_rank; } - - // Add the new element to the queue. - // rm->elem.push_back(val) - ra->elem[(ra->head + ra->nelem) % ra->window_size] = val; - ra->sum += val; - ra->nelem++; + return rq->estimate; } -double rolling_avg_get_avg(struct rolling_avg *ra) { - if (ra->nelem == 0) { - return 0; +void rolling_quantile_push_back(struct rolling_quantile *rq, int x) { + if (x <= rq->estimate) { + rq->current_rank++; } - return (double)ra->sum / (double)ra->nelem; } -TEST_CASE(rolling_avg_test) { -#define NELEM 15 - auto rm = rolling_avg_new(3); - const int data[NELEM] = {1, 2, 3, 1, 4, 5, 2, 3, 6, 5, 4, 3, 2, 0, 0}; - const double expected_avg[NELEM] = { - 1, 1.5, 2, 2, 8.0 / 3.0, 10.0 / 3.0, 11.0 / 3.0, 10.0 / 3.0, - 11.0 / 3.0, 14.0 / 3.0, 5, 4, 3, 5.0 / 3.0, 2.0 / 3.0}; - double avg[NELEM] = {0}; - for (int i = 0; i < NELEM; i++) { - rolling_avg_push(rm, data[i]); - avg[i] = rolling_avg_get_avg(rm); - } - for (int i = 0; i < NELEM; i++) { - TEST_EQUAL(avg[i], expected_avg[i]); +void rolling_quantile_pop_front(struct rolling_quantile *rq, int x) { + if (x <= rq->estimate) { + rq->current_rank--; } } diff --git a/src/utils.h b/src/utils.h index b7c45291..fc6dd306 100644 --- a/src/utils.h +++ b/src/utils.h @@ -17,6 +17,7 @@ #include #include "compiler.h" +#include "log.h" #include "types.h" #define ARR_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) @@ -289,20 +290,97 @@ static inline void free_charpp(char **str) { /// int next_power_of_two(int n); +struct rolling_window { + int *elem; + int elem_head, nelem; + int window_size; +}; + +void rolling_window_destroy(struct rolling_window *rw); +void rolling_window_reset(struct rolling_window *rw); +void rolling_window_init(struct rolling_window *rw, int size); +int rolling_window_pop_front(struct rolling_window *rw); +bool rolling_window_push_back(struct rolling_window *rw, int val, int *front); + +/// Copy the contents of the rolling window to an array. The array is assumed to +/// have enough space to hold the contents of the rolling window. +static inline void attr_unused rolling_window_copy_to_array(struct rolling_window *rw, + int *arr) { + // The length from head to the end of the array + auto head_len = (size_t)(rw->window_size - rw->elem_head); + if (head_len >= (size_t)rw->nelem) { + memcpy(arr, rw->elem + rw->elem_head, sizeof(int) * (size_t)rw->nelem); + } else { + auto tail_len = (size_t)((size_t)rw->nelem - head_len); + memcpy(arr, rw->elem + rw->elem_head, sizeof(int) * head_len); + memcpy(arr + head_len, rw->elem, sizeof(int) * tail_len); + } +} + struct rolling_max; -struct rolling_max *rolling_max_new(int window_size); +struct rolling_max *rolling_max_new(int capacity); void rolling_max_destroy(struct rolling_max *rm); void rolling_max_reset(struct rolling_max *rm); -void rolling_max_push(struct rolling_max *rm, int val); +void rolling_max_pop_front(struct rolling_max *rm, int front); +void rolling_max_push_back(struct rolling_max *rm, int val); int rolling_max_get_max(struct rolling_max *rm); -struct rolling_avg; -struct rolling_avg *rolling_avg_new(int window_size); -void rolling_avg_destroy(struct rolling_avg *ra); -void rolling_avg_reset(struct rolling_avg *ra); -void rolling_avg_push(struct rolling_avg *ra, int val); -double rolling_avg_get_avg(struct rolling_avg *ra); +/// Estimate the mean and variance of random variable X using Welford's online +/// algorithm. +struct cumulative_mean_and_var { + double mean; + double m2; + unsigned int n; +}; + +static inline attr_unused void +cumulative_mean_and_var_init(struct cumulative_mean_and_var *cmv) { + *cmv = (struct cumulative_mean_and_var){0}; +} + +static inline attr_unused void +cumulative_mean_and_var_update(struct cumulative_mean_and_var *cmv, double x) { + if (cmv->n == UINT_MAX) { + // We have too many elements, let's keep the mean and variance. + return; + } + cmv->n++; + double delta = x - cmv->mean; + cmv->mean += delta / (double)cmv->n; + cmv->m2 += delta * (x - cmv->mean); +} + +static inline attr_unused double +cumulative_mean_and_var_get_var(struct cumulative_mean_and_var *cmv) { + if (cmv->n < 2) { + return 0; + } + return cmv->m2 / (double)(cmv->n - 1); +} + +// Find the k-th smallest element in an array. +int quickselect(int *elems, int nelem, int k); + +/// A naive quantile estimator. +/// +/// Estimates the N-th percentile of a random variable X in a sliding window. +struct rolling_quantile { + int current_rank; + int min_target_rank, max_target_rank; + int estimate; + int capacity; + int *tmp_buffer; +}; + +void rolling_quantile_init(struct rolling_quantile *rq, int capacity, int mink, int maxk); +void rolling_quantile_init_with_tolerance(struct rolling_quantile *rq, int window_size, + double target, double tolerance); +void rolling_quantile_reset(struct rolling_quantile *rq); +void rolling_quantile_destroy(struct rolling_quantile *rq); +int rolling_quantile_estimate(struct rolling_quantile *rq, struct rolling_window *elements); +void rolling_quantile_push_back(struct rolling_quantile *rq, int x); +void rolling_quantile_pop_front(struct rolling_quantile *rq, int x); // Some versions of the Android libc do not have timespec_get(), use // clock_gettime() instead.