Method to read, subset and sample expanded data — sample

This method is used on trial_sequence objects to read, subset and sample expanded data.

sample_controls(
  object,
  p_control,
  period = NULL,
  subset_condition = NULL,
  seed = NULL
)

# S4 method for class 'trial_sequence'
sample_controls(
  object,
  p_control,
  period = NULL,
  subset_condition = NULL,
  seed = NULL
)

Arguments

object

An object of class trial_sequence.

p_control

Probability of selecting a control.

period

An integerish vector of non-zero length to select trial period(s) or NULL (default) to select all trial periods.

subset_condition

A string or NULL (default). subset_condition will be translated to a call (in case the expanded data is saved as a data.table or in the csv format) or to a SQL-query (in case the expanded data is saved as a duckdb file).

The operators "==", "!=", ">", ">=", "<", "<=", %in%", "&", "|" are supported. Numeric vectors can be written as c(1, 2, 3) or 1:3. Variables are not supported.

Note: Make sure numeric vectors written as 1:3 are surrounded by spaces, e.g. a %in% c( 1:4 , 6:9 ), otherwise the code will fail.

seed

An integer seed or NULL (default).

Note: The same seed will return a different result depending on the class of the te_datastore object contained in the trial_sequence object.

Value

An updated trial_sequence object, the data is stored in slot @outcome_data as a te_outcome_data object.

Examples

# create a trial_sequence-class object
trial_itt_dir <- file.path(tempdir(), "trial_itt")
dir.create(trial_itt_dir)
trial_itt <- trial_sequence(estimand = "ITT") |>
  set_data(data = data_censored) |>
  set_outcome_model(adjustment_terms = ~ x1 + x2)

trial_itt_csv <- set_expansion_options(
  trial_itt,
  output = save_to_csv(file.path(trial_itt_dir, "trial_csvs")),
  chunk_size = 500
) |>
  expand_trials()

# sample_controls default behaviour returns all trial_periods
sample_controls(trial_itt_csv, p_control = 0.01)
#> Trial Sequence Object 
#> Estimand: Intention-to-treat 
#> Data 
#> N: 725 observations from 89 patients 
#> Key: <id>
#>         id period treatment    x1           x2    x3        x4   age      age_s
#>      <int>  <int>     <num> <num>        <num> <int>     <num> <num>      <num>
#>   1:     1      0         1     1  1.146148362     0 0.7342030    36 0.08333333
#>   2:     1      1         1     1  0.002200337     0 0.7342030    37 0.16666667
#>  ---                                                                           
#> 724:    99      6         1     1 -0.033762356     1 0.5752681    71 3.00000000
#> 725:    99      7         0     0 -1.340496520     1 0.5752681    72 3.08333333
#>      outcome censored eligible time_of_event  first  am_1  cumA switch
#>        <num>    <int>    <num>         <num> <lgcl> <num> <num>  <num>
#>   1:       0        0        1          9999   TRUE     0     1      0
#>   2:       0        0        0          9999  FALSE     1     2      0
#>  ---                                                                  
#> 724:       0        0        0             7  FALSE     1     4      0
#> 725:       1        0        0             7  FALSE     1     4      1
#>      regime_start time_on_regime eligible0 eligible1    wt
#>             <int>          <num>     <num>     <num> <num>
#>   1:            0              0         1         0     1
#>   2:            0              1         0         1     1
#>  ---                                                      
#> 724:            5              1         0         1     1
#> 725:            7              2         0         1     1
#>  
#> IPW for informative censoring: 
#>  - No weight model specified 
#>  
#> Expansion: 
#> Chunk size: 500 
#> Censor at switch: FALSE 
#> First period: 0 | Last period: Inf 
#>  
#> A TE Datastore CSV object 
#> N: 1558 observations 
#> Periods: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 
#> Path: /tmp/RtmpGCze7e/trial_itt/trial_csvs 
#> Columns: id, trial_period, followup_time, outcome, weight, treatment, x1, x2, assigned_treatment 
#>  
#> Outcome model: 
#> TE Outcome Model Object 
#> Formula: outcome ~ assigned_treatment + x1 + x2 + followup_time + I(followup_time^2) + trial_period + I(trial_period^2) 
#> Treatment_var: assigned_treatment 
#> Adjustment_vars: x1 x2 
#>  
#> Use fit_msm() to fit the outcome model 
#>  
#> Outcome data 
#> N: 28 observations from 21 patients in 6 trial periods 
#> Periods: 0 1 2 3 5 10 
#> Sampling control observations with probability: 0.01 
#>        id trial_period followup_time outcome weight treatment    x1         x2
#>     <int>        <int>         <int>   <int>  <int>     <int> <int>      <num>
#>  1:    15            0             0       1      1         1     0 -0.7365256
#>  2:    32            0             0       1      1         1     1  1.9861380
#> ---                                                                           
#> 27:    74            5            13       0      1         0     0  0.2366130
#> 28:    54           10             1       0      1         0     1 -0.3501371
#>     assigned_treatment sample_weight
#>                  <int>         <num>
#>  1:                  1             1
#>  2:                  1             1
#> ---                                 
#> 27:                  0           100
#> 28:                  0           100

# sample_controls can subset the data before sampling
sample_controls(
  trial_itt_csv,
  p_control = 0.2,
  period = 1:10,
  subset_condition = "followup_time %in% 1:20 & x2 < 1",
)
#> Trial Sequence Object 
#> Estimand: Intention-to-treat 
#> Data 
#> N: 725 observations from 89 patients 
#> Key: <id>
#>         id period treatment    x1           x2    x3        x4   age      age_s
#>      <int>  <int>     <num> <num>        <num> <int>     <num> <num>      <num>
#>   1:     1      0         1     1  1.146148362     0 0.7342030    36 0.08333333
#>   2:     1      1         1     1  0.002200337     0 0.7342030    37 0.16666667
#>  ---                                                                           
#> 724:    99      6         1     1 -0.033762356     1 0.5752681    71 3.00000000
#> 725:    99      7         0     0 -1.340496520     1 0.5752681    72 3.08333333
#>      outcome censored eligible time_of_event  first  am_1  cumA switch
#>        <num>    <int>    <num>         <num> <lgcl> <num> <num>  <num>
#>   1:       0        0        1          9999   TRUE     0     1      0
#>   2:       0        0        0          9999  FALSE     1     2      0
#>  ---                                                                  
#> 724:       0        0        0             7  FALSE     1     4      0
#> 725:       1        0        0             7  FALSE     1     4      1
#>      regime_start time_on_regime eligible0 eligible1    wt
#>             <int>          <num>     <num>     <num> <num>
#>   1:            0              0         1         0     1
#>   2:            0              1         0         1     1
#>  ---                                                      
#> 724:            5              1         0         1     1
#> 725:            7              2         0         1     1
#>  
#> IPW for informative censoring: 
#>  - No weight model specified 
#>  
#> Expansion: 
#> Chunk size: 500 
#> Censor at switch: FALSE 
#> First period: 0 | Last period: Inf 
#>  
#> A TE Datastore CSV object 
#> N: 1558 observations 
#> Periods: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 
#> Path: /tmp/RtmpGCze7e/trial_itt/trial_csvs 
#> Columns: id, trial_period, followup_time, outcome, weight, treatment, x1, x2, assigned_treatment 
#>  
#> Outcome model: 
#> TE Outcome Model Object 
#> Formula: outcome ~ assigned_treatment + x1 + x2 + followup_time + I(followup_time^2) + trial_period + I(trial_period^2) 
#> Treatment_var: assigned_treatment 
#> Adjustment_vars: x1 x2 
#>  
#> Use fit_msm() to fit the outcome model 
#>  
#> Outcome data 
#> N: 123 observations from 24 patients in 10 trial periods 
#> Periods: 1 2 3 4 5 6 7 8 9 10 
#> Subset condition: followup_time %in% 1:20 & x2 < 1 
#> Sampling control observations with probability: 0.2 
#>         id trial_period followup_time outcome weight treatment    x1         x2
#>      <int>        <int>         <int>   <int>  <int>     <int> <int>      <num>
#>   1:    50            1             1       0      1         1     1 -0.3846825
#>   2:    54            1             1       0      1         0     0 -1.4326303
#>  ---                                                                           
#> 122:    54            9            10       0      1         0     1 -0.7796190
#> 123:    54           10             6       0      1         0     1 -0.3501371
#>      assigned_treatment sample_weight
#>                   <int>         <num>
#>   1:                  1             5
#>   2:                  0             5
#>  ---                                 
#> 122:                  0             5
#> 123:                  0             5

# delete after use
unlink(trial_itt_dir, recursive = TRUE)