Herd characteristics

table1(~ daysstorage+yearsrms+herdsize +bulkscc + milkyear_kg + digtemp +digdays + drytempex + dryminutes + comptemp +comphours | system_group, data=metadata)
DIG-only
(N=9)
DIG+SEC
(N=7)
SEC-only
(N=5)
Green
(N=6)
Overall
(N=27)
daysstorage
Mean (SD) 2.39 (3.62) 0.929 (0.607) 1.00 (0.612) 1.00 (0) 1.44 (2.16)
Median [Min, Max] 1.00 [0.667, 12.0] 0.500 [0.500, 2.00] 1.00 [0.500, 2.00] 1.00 [1.00, 1.00] 1.00 [0.500, 12.0]
yearsrms
Mean (SD) 6.56 (4.53) 7.29 (4.96) 6.60 (2.88) 6.83 (2.40) 6.81 (3.80)
Median [Min, Max] 7.00 [2.00, 14.0] 5.00 [2.00, 16.0] 5.00 [4.00, 11.0] 7.50 [4.00, 10.0] 7.00 [2.00, 16.0]
herdsize
Mean (SD) 4220 (2570) 3180 (1430) 1520 (1530) 888 (575) 2710 (2200)
Median [Min, Max] 3250 [1710, 8740] 3250 [520, 5300] 1120 [430, 4150] 825 [240, 1880] 1900 [240, 8740]
bulkscc
Mean (SD) 137 (50.2) 133 (15.9) 175 (58.5) 399 (150) 194 (125)
Median [Min, Max] 130 [72.0, 240] 125 [117, 160] 160 [100, 260] 400 [225, 614] 146 [72.0, 614]
Missing 0 (0%) 0 (0%) 0 (0%) 1 (16.7%) 1 (3.7%)
milkyear_kg
Mean (SD) 12200 (993) 12500 (2410) 13300 (1890) 12800 (1050) 12600 (1600)
Median [Min, Max] 12200 [10600, 13600] 13400 [8280, 15600] 12500 [11800, 16600] 12600 [11300, 14100] 12500 [8280, 16600]
digtemp
Mean (SD) 101 (1.56) 100 (1.17) NA (NA) NA (NA) 101 (1.49)
Median [Min, Max] 101 [99.5, 105] 101 [98.0, 101] NA [NA, NA] NA [NA, NA] 101 [98.0, 105]
Missing 0 (0%) 1 (14.3%) 5 (100%) 6 (100%) 12 (44.4%)
digdays
Mean (SD) 20.6 (2.57) 16.9 (3.14) NA (NA) NA (NA) 19.0 (3.32)
Median [Min, Max] 21.0 [16.0, 25.5] 15.5 [13.0, 21.0] NA [NA, NA] NA [NA, NA] 21.0 [13.0, 25.5]
Missing 0 (0%) 0 (0%) 5 (100%) 6 (100%) 11 (40.7%)
drytempex
Mean (SD) NA (NA) 136 (13.4) 160 (NA) NA (NA) 140 (15.5)
Median [Min, Max] NA [NA, NA] 130 [130, 160] 160 [160, 160] NA [NA, NA] 130 [130, 160]
Missing 9 (100%) 2 (28.6%) 4 (80.0%) 6 (100%) 21 (77.8%)
dryminutes
Mean (SD) 16.0 (NA) 10.8 (7.56) 10.0 (NA) NA (NA) 11.4 (6.50)
Median [Min, Max] 16.0 [16.0, 16.0] 13.0 [3.00, 20.0] 10.0 [10.0, 10.0] NA [NA, NA] 13.0 [3.00, 20.0]
Missing 8 (88.9%) 2 (28.6%) 4 (80.0%) 6 (100%) 20 (74.1%)
comptemp
Mean (SD) NA (NA) NA (NA) 146 (8.61) NA (NA) 146 (8.61)
Median [Min, Max] NA [NA, NA] NA [NA, NA] 143 [140, 156] NA [NA, NA] 143 [140, 156]
Missing 9 (100%) 7 (100%) 2 (40.0%) 6 (100%) 24 (88.9%)
comphours
Mean (SD) NA (NA) NA (NA) 18.3 (6.03) NA (NA) 18.3 (6.03)
Median [Min, Max] NA [NA, NA] NA [NA, NA] 19.0 [12.0, 24.0] NA [NA, NA] 19.0 [12.0, 24.0]
Missing 9 (100%) 7 (100%) 2 (40.0%) 6 (100%) 24 (88.9%)
# --- Model herd size -----

model_herdsize <- lm(herdsize ~ system_group,data=mastitis_rtu)
summary(model_herdsize)
## 
## Call:
## lm(formula = herdsize ~ system_group, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2661.7 -1006.0  -181.7   493.3  4513.3 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)   
## (Intercept)             888.3      743.0   1.196  0.24405   
## system_groupDIG+DRY    2293.4     1012.6   2.265  0.03325 * 
## system_groupSEC-only    627.1     1102.1   0.569  0.57489   
## system_groupDigested   3333.3      959.2   3.475  0.00205 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1820 on 23 degrees of freedom
## Multiple R-squared:  0.3922, Adjusted R-squared:  0.313 
## F-statistic: 4.948 on 3 and 23 DF,  p-value: 0.008534
emmeans(model_herdsize,pairwise~system_group)
## $emmeans
##  system_group emmean  SE df lower.CL upper.CL
##  Green           888 743 23     -649     2425
##  DIG+DRY        3182 688 23     1759     4605
##  SEC-only       1515 814 23     -168     3199
##  Digested       4222 607 23     2967     5477
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast               estimate   SE df t.ratio p.value
##  Green - (DIG+DRY)         -2293 1013 23  -2.265  0.1360
##  Green - (SEC-only)         -627 1102 23  -0.569  0.9403
##  Green - Digested          -3333  959 23  -3.475  0.0103
##  (DIG+DRY) - (SEC-only)     1666 1066 23   1.564  0.4181
##  (DIG+DRY) - Digested      -1040  917 23  -1.134  0.6729
##  (SEC-only) - Digested     -2706 1015 23  -2.666  0.0618
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
Anova(model_herdsize,type="III")
## Warning in printHypothesis(L, rhs, names(b)): one or more coefficients in the hypothesis include
##      arithmetic operators in their names;
##   the printed representation of the hypothesis will be omitted
## Anova Table (Type III tests)
## 
## Response: herdsize
##                Sum Sq Df F value   Pr(>F)   
## (Intercept)   4734817  1  1.4294 0.244052   
## system_group 49169987  3  4.9478 0.008534 **
## Residuals    76188994 23                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Check normality assumption through model's residuals

plot(density(model_herdsize$residuals))

# --- Model bulk tank SCC -----

model_bulkscc <- lm(bulkscc ~ system_group,data=mastitis_rtu)
summary(model_bulkscc)
## 
## Call:
## lm(formula = bulkscc ~ system_group, data = mastitis_rtu)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -174.000  -24.131   -7.373   19.286  215.000 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            399.00      33.70  11.840 5.14e-11 ***
## system_groupDIG+DRY   -266.14      44.12  -6.032 4.53e-06 ***
## system_groupSEC-only  -224.00      47.66  -4.700 0.000109 ***
## system_groupDigested  -262.11      42.03  -6.236 2.82e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 75.36 on 22 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6816, Adjusted R-squared:  0.6382 
## F-statistic:  15.7 on 3 and 22 DF,  p-value: 1.109e-05
emmeans(model_bulkscc,pairwise~system_group)
## $emmeans
##  system_group emmean   SE df lower.CL upper.CL
##  Green           399 33.7 22    329.1      469
##  DIG+DRY         133 28.5 22     73.8      192
##  SEC-only        175 33.7 22    105.1      245
##  Digested        137 25.1 22     84.8      189
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast               estimate   SE df t.ratio p.value
##  Green - (DIG+DRY)        266.14 44.1 22   6.032  <.0001
##  Green - (SEC-only)       224.00 47.7 22   4.700  0.0006
##  Green - Digested         262.11 42.0 22   6.236  <.0001
##  (DIG+DRY) - (SEC-only)   -42.14 44.1 22  -0.955  0.7757
##  (DIG+DRY) - Digested      -4.03 38.0 22  -0.106  0.9996
##  (SEC-only) - Digested     38.11 42.0 22   0.907  0.8014
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
Anova(model_bulkscc,type="III")
## Warning in printHypothesis(L, rhs, names(b)): one or more coefficients in the hypothesis include
##      arithmetic operators in their names;
##   the printed representation of the hypothesis will be omitted
## Anova Table (Type III tests)
## 
## Response: bulkscc
##              Sum Sq Df F value    Pr(>F)    
## (Intercept)  796005  1  140.18 5.144e-11 ***
## system_group 267449  3   15.70 1.109e-05 ***
## Residuals    124926 22                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Check normality assumption through model's residuals

plot(density(model_bulkscc$residuals))

# --- Model milk year ----

model_milkyear_kg <- lm(milkyear_kg ~ system_group,data=mastitis_rtu)
summary(model_milkyear_kg)
## 
## Call:
## lm(formula = milkyear_kg ~ system_group, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4257.3  -927.8   -71.7  1102.2  3291.8 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           12775.2      673.9  18.957 1.55e-15 ***
## system_groupDIG+DRY    -239.9      918.4  -0.261    0.796    
## system_groupSEC-only    489.1      999.5   0.489    0.629    
## system_groupDigested   -617.5      870.0  -0.710    0.485    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1651 on 23 degrees of freedom
## Multiple R-squared:  0.06254,    Adjusted R-squared:  -0.05974 
## F-statistic: 0.5114 on 3 and 23 DF,  p-value: 0.6784
emmeans(model_milkyear_kg,pairwise~system_group)
## $emmeans
##  system_group emmean  SE df lower.CL upper.CL
##  Green         12775 674 23    11381    14169
##  DIG+DRY       12535 624 23    11245    13826
##  SEC-only      13264 738 23    11737    14791
##  Digested      12158 550 23    11019    13296
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast               estimate   SE df t.ratio p.value
##  Green - (DIG+DRY)           240  918 23   0.261  0.9936
##  Green - (SEC-only)         -489 1000 23  -0.489  0.9607
##  Green - Digested            618  870 23   0.710  0.8922
##  (DIG+DRY) - (SEC-only)     -729  967 23  -0.754  0.8739
##  (DIG+DRY) - Digested        378  832 23   0.454  0.9682
##  (SEC-only) - Digested      1107  921 23   1.202  0.6318
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
Anova(model_milkyear_kg,type="III")
## Warning in printHypothesis(L, rhs, names(b)): one or more coefficients in the hypothesis include
##      arithmetic operators in their names;
##   the printed representation of the hypothesis will be omitted
## Anova Table (Type III tests)
## 
## Response: milkyear_kg
##                 Sum Sq Df  F value    Pr(>F)    
## (Intercept)  979233166  1 359.3851 1.548e-15 ***
## system_group   4180605  3   0.5114    0.6784    
## Residuals     62669156 23                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Check normality assumption through model's residuals

plot(density(model_milkyear_kg$residuals))

# --- Model years rms -----

model_yearsrms <- lm(yearsrms ~ system_group,data=mastitis_rtu)
summary(model_yearsrms)
## 
## Call:
## lm(formula = yearsrms ~ system_group, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.2857 -2.8333  0.1667  2.7143  8.7143 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            6.8333     1.6456   4.153 0.000385 ***
## system_groupDIG+DRY    0.4524     2.2425   0.202 0.841902    
## system_groupSEC-only  -0.2333     2.4408  -0.096 0.924667    
## system_groupDigested  -0.2778     2.1244  -0.131 0.897106    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.031 on 23 degrees of freedom
## Multiple R-squared:  0.006355,   Adjusted R-squared:  -0.1233 
## F-statistic: 0.04903 on 3 and 23 DF,  p-value: 0.9853
emmeans(model_yearsrms,pairwise~system_group)
## $emmeans
##  system_group emmean   SE df lower.CL upper.CL
##  Green          6.83 1.65 23     3.43    10.24
##  DIG+DRY        7.29 1.52 23     4.13    10.44
##  SEC-only       6.60 1.80 23     2.87    10.33
##  Digested       6.56 1.34 23     3.78     9.33
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast               estimate   SE df t.ratio p.value
##  Green - (DIG+DRY)       -0.4524 2.24 23  -0.202  0.9970
##  Green - (SEC-only)       0.2333 2.44 23   0.096  0.9997
##  Green - Digested         0.2778 2.12 23   0.131  0.9992
##  (DIG+DRY) - (SEC-only)   0.6857 2.36 23   0.291  0.9912
##  (DIG+DRY) - Digested     0.7302 2.03 23   0.359  0.9837
##  (SEC-only) - Digested    0.0444 2.25 23   0.020  1.0000
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
Anova(model_yearsrms,type="III")
## Warning in printHypothesis(L, rhs, names(b)): one or more coefficients in the hypothesis include
##      arithmetic operators in their names;
##   the printed representation of the hypothesis will be omitted
## Anova Table (Type III tests)
## 
## Response: yearsrms
##              Sum Sq Df F value    Pr(>F)    
## (Intercept)  280.17  1  17.244 0.0003849 ***
## system_group   2.39  3   0.049 0.9852604    
## Residuals    373.68 23                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Check normality assumption through model's residuals

plot(density(model_yearsrms$residuals))

# --- Model days storage -----

model_daysstorage <- lm(daysstorage ~ system_group,data=mastitis_rtu)
summary(model_daysstorage)
## 
## Call:
## lm(formula = daysstorage ~ system_group, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7222 -0.6944 -0.3889  0.0000  9.6111 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)
## (Intercept)           1.000e+00  8.880e-01   1.126    0.272
## system_groupDIG+DRY  -7.143e-02  1.210e+00  -0.059    0.953
## system_groupSEC-only  5.733e-16  1.317e+00   0.000    1.000
## system_groupDigested  1.389e+00  1.146e+00   1.211    0.238
## 
## Residual standard error: 2.175 on 23 degrees of freedom
## Multiple R-squared:  0.09979,    Adjusted R-squared:  -0.01763 
## F-statistic: 0.8499 on 3 and 23 DF,  p-value: 0.4809
emmeans(model_daysstorage,pairwise~system_group)
## $emmeans
##  system_group emmean    SE df lower.CL upper.CL
##  Green         1.000 0.888 23   -0.837     2.84
##  DIG+DRY       0.929 0.822 23   -0.772     2.63
##  SEC-only      1.000 0.973 23   -1.012     3.01
##  Digested      2.389 0.725 23    0.889     3.89
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast               estimate   SE df t.ratio p.value
##  Green - (DIG+DRY)        0.0714 1.21 23   0.059  0.9999
##  Green - (SEC-only)       0.0000 1.32 23   0.000  1.0000
##  Green - Digested        -1.3889 1.15 23  -1.211  0.6260
##  (DIG+DRY) - (SEC-only)  -0.0714 1.27 23  -0.056  0.9999
##  (DIG+DRY) - Digested    -1.4603 1.10 23  -1.332  0.5527
##  (SEC-only) - Digested   -1.3889 1.21 23  -1.145  0.6664
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
Anova(model_daysstorage,type="III")
## Warning in printHypothesis(L, rhs, names(b)): one or more coefficients in the hypothesis include
##      arithmetic operators in their names;
##   the printed representation of the hypothesis will be omitted
## Anova Table (Type III tests)
## 
## Response: daysstorage
##               Sum Sq Df F value Pr(>F)
## (Intercept)    6.000  1  1.2681 0.2717
## system_group  12.063  3  0.8499 0.4809
## Residuals    108.825 23
# Check normality assumption through model's residuals

plot(density(model_daysstorage$residuals))

Mastitis pathogens

# --- Model coliforms -----

model_coliforms <- lm(Coliforms ~ system_group,data=mastitis_rtu)
summary(model_coliforms)
## 
## Call:
## lm(formula = Coliforms ~ system_group, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2898 -1.1031  0.0596  0.7119  4.1672 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.5318     0.7181   6.311 1.94e-06 ***
## system_groupDIG+DRY   -3.0000     0.9786  -3.066  0.00548 ** 
## system_groupSEC-only  -1.4945     1.0651  -1.403  0.17391    
## system_groupDigested  -1.2419     0.9270  -1.340  0.19343    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.759 on 23 degrees of freedom
## Multiple R-squared:  0.2953, Adjusted R-squared:  0.2034 
## F-statistic: 3.213 on 3 and 23 DF,  p-value: 0.04173
emmeans(model_coliforms,pairwise~system_group)
## $emmeans
##  system_group emmean    SE df lower.CL upper.CL
##  Green          4.53 0.718 23    3.046     6.02
##  DIG+DRY        1.53 0.665 23    0.157     2.91
##  SEC-only       3.04 0.787 23    1.410     4.66
##  Digested       3.29 0.586 23    2.077     4.50
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast               estimate    SE df t.ratio p.value
##  Green - (DIG+DRY)         3.000 0.979 23   3.066  0.0262
##  Green - (SEC-only)        1.495 1.065 23   1.403  0.5101
##  Green - Digested          1.242 0.927 23   1.340  0.5481
##  (DIG+DRY) - (SEC-only)   -1.505 1.030 23  -1.462  0.4757
##  (DIG+DRY) - Digested     -1.758 0.886 23  -1.983  0.2232
##  (SEC-only) - Digested    -0.253 0.981 23  -0.257  0.9938
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
Anova(model_coliforms,type="III")
## Warning in printHypothesis(L, rhs, names(b)): one or more coefficients in the hypothesis include
##      arithmetic operators in their names;
##   the printed representation of the hypothesis will be omitted
## Anova Table (Type III tests)
## 
## Response: Coliforms
##               Sum Sq Df F value    Pr(>F)    
## (Intercept)  123.222  1 39.8286 1.936e-06 ***
## system_group  29.822  3  3.2131   0.04173 *  
## Residuals     71.158 23                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Check normality assumption through model's residuals

plot(density(model_coliforms$residuals))

# --- Model Herd size --> coliforms -----

model_coliforms_hs_uni <- lm(Coliforms ~ herdsize_cat,data=mastitis_rtu)
summary(model_coliforms_hs_uni)
## 
## Call:
## lm(formula = Coliforms ~ herdsize_cat, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1918 -1.0930  0.4801  1.3020  2.4229 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             4.1918     0.6080   6.894 3.95e-07 ***
## herdsize_cat1350-3250  -1.2255     0.8599  -1.425   0.1670    
## herdsize_cat>3250      -2.1600     0.8599  -2.512   0.0191 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.824 on 24 degrees of freedom
## Multiple R-squared:  0.2092, Adjusted R-squared:  0.1433 
## F-statistic: 3.174 on 2 and 24 DF,  p-value: 0.05983
model_coliforms_hsh_uni <- lm(Coliforms ~ herdsize_hundred,data=mastitis_rtu)
summary(model_coliforms_hsh_uni)
## 
## Call:
## lm(formula = Coliforms ~ herdsize_hundred, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.8893 -1.0121  0.2191  1.2487  2.6752 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.06484    0.56636   7.177  1.6e-07 ***
## herdsize_hundred -0.03696    0.01636  -2.259   0.0328 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.831 on 25 degrees of freedom
## Multiple R-squared:  0.1695, Adjusted R-squared:  0.1363 
## F-statistic: 5.104 on 1 and 25 DF,  p-value: 0.03284
# --- Model days storage --> coliforms -----

model_coliforms_hs_uni <- lm(Coliforms ~ daysstorage,data=mastitis_rtu)
summary(model_coliforms_hs_uni)
## 
## Call:
## lm(formula = Coliforms ~ daysstorage, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1557 -0.8925  0.2148  1.6999  3.1665 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.8230     0.4605   6.130 2.08e-06 ***
## daysstorage   0.1664     0.1797   0.926    0.363    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.976 on 25 degrees of freedom
## Multiple R-squared:  0.03314,    Adjusted R-squared:  -0.005537 
## F-statistic: 0.8568 on 1 and 25 DF,  p-value: 0.3635
# --- Model years RMS --> coliforms -----

model_coliforms_years_rms_uni <- lm(Coliforms ~ yearsrms,data=mastitis_rtu)
summary(model_coliforms_years_rms_uni)
## 
## Call:
## lm(formula = Coliforms ~ yearsrms, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3470 -0.9527  0.2790  1.6048  3.2223 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  2.85276    0.80380   3.549  0.00156 **
## yearsrms     0.03089    0.10345   0.299  0.76771   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.006 on 25 degrees of freedom
## Multiple R-squared:  0.003554,   Adjusted R-squared:  -0.0363 
## F-statistic: 0.08917 on 1 and 25 DF,  p-value: 0.7677
# --- Model breed --> coliforms -----

model_coliforms_breed_uni <- lm(Coliforms ~ breed,data=mastitis_rtu)
summary(model_coliforms_breed_uni)
## 
## Call:
## lm(formula = Coliforms ~ breed, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6886 -1.1767  0.4253  1.2968  2.8863 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       3.6886     0.4511   8.177 2.14e-08 ***
## breedCrossbread  -1.6584     1.0336  -1.605    0.122    
## breedJersey      -1.7083     0.8832  -1.934    0.065 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.86 on 24 degrees of freedom
## Multiple R-squared:  0.1778, Adjusted R-squared:  0.1093 
## F-statistic: 2.595 on 2 and 24 DF,  p-value: 0.09545
Anova(model_coliforms_breed_uni,type="III")
## Anova Table (Type III tests)
## 
## Response: Coliforms
##              Sum Sq Df F value    Pr(>F)    
## (Intercept) 231.297  1 66.8602 2.139e-08 ***
## breed        17.954  2  2.5949   0.09545 .  
## Residuals    83.026 24                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# --- Model System group + years RMS --> coliforms -----

model_coliforms_hs <- lm(Coliforms ~ system_group+yearsrms ,data=mastitis_rtu)
summary(model_coliforms_hs)
## 
## Call:
## lm(formula = Coliforms ~ system_group + yearsrms, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0826 -1.1134  0.1324  0.7663  4.0437 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           4.22086    0.96591   4.370 0.000244 ***
## system_groupDIG+DRY  -3.02054    0.99599  -3.033 0.006113 ** 
## system_groupSEC-only -1.48391    1.08330  -1.370 0.184568    
## system_groupDigested -1.22930    0.94305  -1.304 0.205870    
## yearsrms              0.04550    0.09253   0.492 0.627764    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.789 on 22 degrees of freedom
## Multiple R-squared:  0.303,  Adjusted R-squared:  0.1763 
## F-statistic: 2.391 on 4 and 22 DF,  p-value: 0.08174
emmeans(model_coliforms_hs,pairwise~system_group)
## $emmeans
##  system_group emmean    SE df lower.CL upper.CL
##  Green          4.53 0.730 22    3.017     6.05
##  DIG+DRY        1.51 0.677 22    0.105     2.92
##  SEC-only       3.05 0.800 22    1.388     4.71
##  Digested       3.30 0.597 22    2.064     4.54
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast               estimate    SE df t.ratio p.value
##  Green - (DIG+DRY)         3.021 0.996 22   3.033  0.0289
##  Green - (SEC-only)        1.484 1.083 22   1.370  0.5305
##  Green - Digested          1.229 0.943 22   1.304  0.5704
##  (DIG+DRY) - (SEC-only)   -1.537 1.049 22  -1.465  0.4747
##  (DIG+DRY) - Digested     -1.791 0.904 22  -1.982  0.2251
##  (SEC-only) - Digested    -0.255 0.998 22  -0.255  0.9940
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
Anova(model_coliforms_hs,type="III")
## Warning in printHypothesis(L, rhs, names(b)): one or more coefficients in the hypothesis include
##      arithmetic operators in their names;
##   the printed representation of the hypothesis will be omitted
## Anova Table (Type III tests)
## 
## Response: Coliforms
##              Sum Sq Df F value    Pr(>F)    
## (Intercept)  61.091  1 19.0953 0.0002445 ***
## system_group 30.237  3  3.1504 0.0453413 *  
## yearsrms      0.774  1  0.2418 0.6277639    
## Residuals    70.384 22                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# --- Model System group + days storage--> coliforms -----

model_coliforms_hs <- lm(Coliforms ~ system_group+daysstorage ,data=mastitis_rtu)
summary(model_coliforms_hs)
## 
## Call:
## lm(formula = Coliforms ~ system_group + daysstorage, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0791 -1.0230  0.1250  0.4712  4.2322 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.3800     0.7408   5.913 5.98e-06 ***
## system_groupDIG+DRY   -2.9891     0.9829  -3.041  0.00599 ** 
## system_groupSEC-only  -1.4945     1.0697  -1.397  0.17629    
## system_groupDigested  -1.4527     0.9603  -1.513  0.14456    
## daysstorage            0.1518     0.1693   0.896  0.37986    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.766 on 22 degrees of freedom
## Multiple R-squared:  0.3201, Adjusted R-squared:  0.1965 
## F-statistic:  2.59 on 4 and 22 DF,  p-value: 0.06484
emmeans(model_coliforms_hs,pairwise~system_group)
## $emmeans
##  system_group emmean    SE df lower.CL upper.CL
##  Green          4.60 0.725 22    3.095     6.10
##  DIG+DRY        1.61 0.673 22    0.214     3.01
##  SEC-only       3.10 0.794 22    1.459     4.75
##  Digested       3.15 0.610 22    1.881     4.41
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast               estimate    SE df t.ratio p.value
##  Green - (DIG+DRY)        2.9891 0.983 22   3.041  0.0284
##  Green - (SEC-only)       1.4945 1.070 22   1.397  0.5142
##  Green - Digested         1.4527 0.960 22   1.513  0.4471
##  (DIG+DRY) - (SEC-only)  -1.4946 1.034 22  -1.445  0.4862
##  (DIG+DRY) - Digested    -1.5364 0.924 22  -1.663  0.3662
##  (SEC-only) - Digested   -0.0418 1.013 22  -0.041  1.0000
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
Anova(model_coliforms_hs,type="III")
## Warning in printHypothesis(L, rhs, names(b)): one or more coefficients in the hypothesis include
##      arithmetic operators in their names;
##   the printed representation of the hypothesis will be omitted
## Anova Table (Type III tests)
## 
## Response: Coliforms
##               Sum Sq Df F value    Pr(>F)    
## (Intercept)  109.093  1 34.9599 5.976e-06 ***
## system_group  28.982  3  3.0958   0.04779 *  
## daysstorage    2.506  1  0.8031   0.37986    
## Residuals     68.651 22                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ----- Model Klebsiella spp. -------

model_klebsiella <- lm(`Klebsiella sp.` ~ system_group,data=mastitis_rtu)
summary(model_klebsiella)
## 
## Call:
## lm(formula = `Klebsiella sp.` ~ system_group, data = mastitis_rtu)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -1.932 -0.524  0.000  0.000  2.163 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            1.9324     0.3977   4.859 6.63e-05 ***
## system_groupDIG+DRY   -1.9324     0.5420  -3.565  0.00164 ** 
## system_groupSEC-only  -1.9324     0.5899  -3.276  0.00332 ** 
## system_groupDigested  -1.4084     0.5135  -2.743  0.01159 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9742 on 23 degrees of freedom
## Multiple R-squared:  0.4076, Adjusted R-squared:  0.3304 
## F-statistic: 5.276 on 3 and 23 DF,  p-value: 0.006453
emmeans(model_klebsiella,pairwise~system_group)
## $emmeans
##  system_group emmean    SE df lower.CL upper.CL
##  Green         1.932 0.398 23    1.110    2.755
##  DIG+DRY       0.000 0.368 23   -0.762    0.762
##  SEC-only      0.000 0.436 23   -0.901    0.901
##  Digested      0.524 0.325 23   -0.148    1.196
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast               estimate    SE df t.ratio p.value
##  Green - (DIG+DRY)         1.932 0.542 23   3.565  0.0083
##  Green - (SEC-only)        1.932 0.590 23   3.276  0.0163
##  Green - Digested          1.408 0.513 23   2.743  0.0526
##  (DIG+DRY) - (SEC-only)    0.000 0.570 23   0.000  1.0000
##  (DIG+DRY) - Digested     -0.524 0.491 23  -1.067  0.7123
##  (SEC-only) - Digested    -0.524 0.543 23  -0.964  0.7707
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
Anova(model_klebsiella,type="III")
## Warning in printHypothesis(L, rhs, names(b)): one or more coefficients in the hypothesis include
##      arithmetic operators in their names;
##   the printed representation of the hypothesis will be omitted
## Anova Table (Type III tests)
## 
## Response: Klebsiella sp.
##              Sum Sq Df F value    Pr(>F)    
## (Intercept)  22.405  1 23.6055 6.627e-05 ***
## system_group 15.024  3  5.2761  0.006453 ** 
## Residuals    21.831 23                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Check normality assumption through model's residuals

plot(density(model_klebsiella$residuals))

# --- Model Herd size --> klebsiella -----

model_klebsiella_hs_uni <- lm(`Klebsiella sp.` ~ herdsize_cat,data=mastitis_rtu)
summary(model_klebsiella_hs_uni)
## 
## Call:
## lm(formula = `Klebsiella sp.` ~ herdsize_cat, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0260 -0.5240 -0.2623 -0.2623  3.0694 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)  
## (Intercept)             1.0260     0.3976   2.581   0.0164 *
## herdsize_cat1350-3250  -0.7637     0.5623  -1.358   0.1870  
## herdsize_cat>3250      -0.5020     0.5623  -0.893   0.3809  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.193 on 24 degrees of freedom
## Multiple R-squared:  0.07356,    Adjusted R-squared:  -0.003647 
## F-statistic: 0.9528 on 2 and 24 DF,  p-value: 0.3998
model_klebsiella_hsh_uni <- lm(`Klebsiella sp.` ~ herdsize_hundred,data=mastitis_rtu)
summary(model_klebsiella_hsh_uni)
## 
## Call:
## lm(formula = `Klebsiella sp.` ~ herdsize_hundred, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7729 -0.6266 -0.5624 -0.5051  3.5668 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)       0.49109    0.37435   1.312    0.201
## herdsize_hundred  0.00417    0.01081   0.386    0.703
## 
## Residual standard error: 1.211 on 25 degrees of freedom
## Multiple R-squared:  0.005914,   Adjusted R-squared:  -0.03385 
## F-statistic: 0.1487 on 1 and 25 DF,  p-value: 0.703
# --- Model days storage --> klebsiella -----

model_klebsiella_hs_uni <- lm(`Klebsiella sp.` ~ daysstorage,data=mastitis_rtu)
summary(model_klebsiella_hs_uni)
## 
## Call:
## lm(formula = `Klebsiella sp.` ~ daysstorage, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6607 -0.6407 -0.6307 -0.2710  3.4647 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.69071    0.28124   2.456   0.0213 *
## daysstorage -0.05997    0.10978  -0.546   0.5897  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.207 on 25 degrees of freedom
## Multiple R-squared:  0.01179,    Adjusted R-squared:  -0.02773 
## F-statistic: 0.2984 on 1 and 25 DF,  p-value: 0.5897
# --- Model years RMS --> klebsiella -----

model_klebsiella_years_rms_uni <- lm(`Klebsiella sp.` ~ yearsrms,data=mastitis_rtu)
summary(model_klebsiella_years_rms_uni)
## 
## Call:
## lm(formula = `Klebsiella sp.` ~ yearsrms, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6866 -0.6438 -0.5838 -0.4638  3.5116 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.72092    0.48573   1.484    0.150
## yearsrms    -0.01714    0.06252  -0.274    0.786
## 
## Residual standard error: 1.212 on 25 degrees of freedom
## Multiple R-squared:  0.002999,   Adjusted R-squared:  -0.03688 
## F-statistic: 0.0752 on 1 and 25 DF,  p-value: 0.7862
# --- Model breed --> klebsiella -----

model_klebsiella_breed_uni <- lm(`Klebsiella sp.` ~ breed,data=mastitis_rtu)
summary(model_klebsiella_breed_uni)
## 
## Call:
## lm(formula = `Klebsiella sp.` ~ breed, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0238 -0.6136 -0.4411 -0.4411  3.0715 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)       0.4411     0.2950   1.495    0.148
## breedCrossbread   0.5827     0.6759   0.862    0.397
## breedJersey       0.3449     0.5775   0.597    0.556
## 
## Residual standard error: 1.216 on 24 degrees of freedom
## Multiple R-squared:  0.03676,    Adjusted R-squared:  -0.04351 
## F-statistic: 0.458 on 2 and 24 DF,  p-value: 0.638
Anova(model_klebsiella_breed_uni,type="III")
## Anova Table (Type III tests)
## 
## Response: Klebsiella sp.
##             Sum Sq Df F value Pr(>F)
## (Intercept)  3.308  1  2.2365 0.1478
## breed        1.355  2  0.4580 0.6380
## Residuals   35.499 24
# ----- Model SSLO ------

model_strepto <- lm(`Streptococcus sp.` ~ system_group,data=mastitis_rtu)
summary(model_strepto)
## 
## Call:
## lm(formula = `Streptococcus sp.` ~ system_group, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3817 -0.6816  0.1860  0.6439  3.4312 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            5.6099     0.5394  10.400 3.63e-10 ***
## system_groupDIG+DRY   -3.2282     0.7351  -4.391 0.000212 ***
## system_groupSEC-only  -1.5333     0.8001  -1.916 0.067833 .  
## system_groupDigested  -0.6221     0.6964  -0.893 0.380957    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.321 on 23 degrees of freedom
## Multiple R-squared:  0.5025, Adjusted R-squared:  0.4376 
## F-statistic: 7.742 on 3 and 23 DF,  p-value: 0.0009485
emmeans(model_strepto,pairwise~system_group)
## $emmeans
##  system_group emmean    SE df lower.CL upper.CL
##  Green          5.61 0.539 23     4.49     6.73
##  DIG+DRY        2.38 0.499 23     1.35     3.41
##  SEC-only       4.08 0.591 23     2.85     5.30
##  Digested       4.99 0.440 23     4.08     5.90
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast               estimate    SE df t.ratio p.value
##  Green - (DIG+DRY)         3.228 0.735 23   4.391  0.0011
##  Green - (SEC-only)        1.533 0.800 23   1.916  0.2491
##  Green - Digested          0.622 0.696 23   0.893  0.8084
##  (DIG+DRY) - (SEC-only)   -1.695 0.774 23  -2.191  0.1557
##  (DIG+DRY) - Digested     -2.606 0.666 23  -3.914  0.0036
##  (SEC-only) - Digested    -0.911 0.737 23  -1.236  0.6109
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
Anova(model_strepto,type="III")
## Warning in printHypothesis(L, rhs, names(b)): one or more coefficients in the hypothesis include
##      arithmetic operators in their names;
##   the printed representation of the hypothesis will be omitted
## Anova Table (Type III tests)
## 
## Response: Streptococcus sp.
##               Sum Sq Df  F value    Pr(>F)    
## (Intercept)  188.827  1 108.1544 3.631e-10 ***
## system_group  40.552  3   7.7423 0.0009485 ***
## Residuals     40.156 23                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Check normality assumption through model's residuals

plot(density(model_strepto$residuals))

# --- Model Herd size --> Streptococcus -----

model_Streptococcus_hs_uni <- lm(`Streptococcus sp.` ~ herdsize_cat,data=mastitis_rtu)
summary(model_Streptococcus_hs_uni)
## 
## Call:
## lm(formula = `Streptococcus sp.` ~ herdsize_cat, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.8201 -1.0604  0.2498  0.9246  2.9758 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             5.1024     0.5756   8.864  4.9e-09 ***
## herdsize_cat1350-3250  -1.1799     0.8140  -1.449    0.160    
## herdsize_cat>3250      -1.2823     0.8140  -1.575    0.128    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.727 on 24 degrees of freedom
## Multiple R-squared:  0.1133, Adjusted R-squared:  0.03935 
## F-statistic: 1.533 on 2 and 24 DF,  p-value: 0.2364
model_Streptococcus_hsh_uni <- lm(`Streptococcus sp.` ~ herdsize_hundred,data=mastitis_rtu)
summary(model_Streptococcus_hsh_uni)
## 
## Call:
## lm(formula = `Streptococcus sp.` ~ herdsize_hundred, data = mastitis_rtu)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.271 -1.503  0.498  1.363  2.635 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.335788   0.555450   7.806 3.67e-08 ***
## herdsize_hundred -0.001996   0.016043  -0.124    0.902    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.796 on 25 degrees of freedom
## Multiple R-squared:  0.000619,   Adjusted R-squared:  -0.03936 
## F-statistic: 0.01548 on 1 and 25 DF,  p-value: 0.902
# --- Model days storage --> Streptococcus -----

model_Streptococcus_hs_uni <- lm(`Streptococcus sp.` ~ daysstorage,data=mastitis_rtu)
summary(model_Streptococcus_hs_uni)
## 
## Call:
## lm(formula = `Streptococcus sp.` ~ daysstorage, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.2919 -1.5184  0.5224  1.3934  2.5224 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.25523    0.41856  10.166  2.3e-10 ***
## daysstorage  0.01832    0.16337   0.112    0.912    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.796 on 25 degrees of freedom
## Multiple R-squared:  0.0005025,  Adjusted R-squared:  -0.03948 
## F-statistic: 0.01257 on 1 and 25 DF,  p-value: 0.9116
# --- Model years RMS --> Streptococcus -----

model_Streptococcus_years_rms_uni <- lm(`Streptococcus sp.` ~ yearsrms,data=mastitis_rtu)
summary(model_Streptococcus_years_rms_uni)
## 
## Call:
## lm(formula = `Streptococcus sp.` ~ yearsrms, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4038 -1.5624  0.6482  1.2883  2.3601 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.49981    0.71816   6.266 1.48e-06 ***
## yearsrms    -0.03201    0.09243  -0.346    0.732    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.792 on 25 degrees of freedom
## Multiple R-squared:  0.004774,   Adjusted R-squared:  -0.03504 
## F-statistic: 0.1199 on 1 and 25 DF,  p-value: 0.732
# --- Model breed --> Streptococcus -----

model_Streptococcus_breed_uni <- lm(`Streptococcus sp.` ~ breed,data=mastitis_rtu)
summary(model_Streptococcus_breed_uni)
## 
## Call:
## lm(formula = `Streptococcus sp.` ~ breed, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.3352 -1.4453  0.4607  1.3492  2.2377 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.3352     0.4370   9.921 5.74e-10 ***
## breedCrossbread  -0.7770     1.0012  -0.776    0.445    
## breedJersey       0.2772     0.8556   0.324    0.749    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.802 on 24 degrees of freedom
## Multiple R-squared:  0.03468,    Adjusted R-squared:  -0.04577 
## F-statistic: 0.4311 on 2 and 24 DF,  p-value: 0.6547
Anova(model_Streptococcus_breed_uni,type="III")
## Anova Table (Type III tests)
## 
## Response: Streptococcus sp.
##             Sum Sq Df F value   Pr(>F)    
## (Intercept) 319.50  1 98.4222 5.74e-10 ***
## breed         2.80  2  0.4311   0.6547    
## Residuals    77.91 24                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ---- Model Staph ------

model_staph <- lm(`Staphylococcus sp.` ~ system_group,data=mastitis_rtu)
summary(model_staph)
## 
## Call:
## lm(formula = `Staphylococcus sp.` ~ system_group, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6352 -0.1888  0.0000  0.0000  2.5409 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)          2.137e-17  2.776e-01   0.000    1.000
## system_groupDIG+DRY  0.000e+00  3.783e-01   0.000    1.000
## system_groupSEC-only 6.352e-01  4.118e-01   1.543    0.137
## system_groupDigested 1.888e-01  3.584e-01   0.527    0.603
## 
## Residual standard error: 0.68 on 23 degrees of freedom
## Multiple R-squared:  0.1206, Adjusted R-squared:  0.005847 
## F-statistic: 1.051 on 3 and 23 DF,  p-value: 0.389
emmeans(model_staph,pairwise~system_group)
## $emmeans
##  system_group emmean    SE df lower.CL upper.CL
##  Green         0.000 0.278 23 -0.57430    0.574
##  DIG+DRY       0.000 0.257 23 -0.53170    0.532
##  SEC-only      0.635 0.304 23  0.00611    1.264
##  Digested      0.189 0.227 23 -0.28013    0.658
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast               estimate    SE df t.ratio p.value
##  Green - (DIG+DRY)         0.000 0.378 23   0.000  1.0000
##  Green - (SEC-only)       -0.635 0.412 23  -1.543  0.4297
##  Green - Digested         -0.189 0.358 23  -0.527  0.9517
##  (DIG+DRY) - (SEC-only)   -0.635 0.398 23  -1.595  0.4008
##  (DIG+DRY) - Digested     -0.189 0.343 23  -0.551  0.9454
##  (SEC-only) - Digested     0.446 0.379 23   1.177  0.6469
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
Anova(model_staph,type="III")
## Warning in printHypothesis(L, rhs, names(b)): one or more coefficients in the hypothesis include
##      arithmetic operators in their names;
##   the printed representation of the hypothesis will be omitted
## Anova Table (Type III tests)
## 
## Response: Staphylococcus sp.
##              Sum Sq Df F value Pr(>F)
## (Intercept)   0.000  1   0.000  1.000
## system_group  1.458  3   1.051  0.389
## Residuals    10.636 23
# Check normality assumption through model's residuals

plot(density(model_staph$residuals))

# --- Model Herd size --> Staphylococcus -----

model_Staphylococcus_hs_uni <- lm(`Staphylococcus sp.` ~ herdsize_cat,data=mastitis_rtu)
summary(model_Staphylococcus_hs_uni)
## 
## Call:
## lm(formula = `Staphylococcus sp.` ~ herdsize_cat, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3529 -0.3529 -0.1888  0.0000  2.8232 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)             0.3529     0.2311   1.527    0.140
## herdsize_cat1350-3250  -0.3529     0.3268  -1.080    0.291
## herdsize_cat>3250      -0.1641     0.3268  -0.502    0.620
## 
## Residual standard error: 0.6932 on 24 degrees of freedom
## Multiple R-squared:  0.04641,    Adjusted R-squared:  -0.03305 
## F-statistic: 0.5841 on 2 and 24 DF,  p-value: 0.5653
model_Staphylococcus_hsh_uni <- lm(`Staphylococcus sp.` ~ herdsize_hundred,data=mastitis_rtu)
summary(model_Staphylococcus_hsh_uni)
## 
## Call:
## lm(formula = `Staphylococcus sp.` ~ herdsize_hundred, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.2819 -0.1978 -0.1598 -0.1334  3.0515 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)      0.112736   0.214384   0.526    0.604
## herdsize_hundred 0.002503   0.006192   0.404    0.690
## 
## Residual standard error: 0.6933 on 25 degrees of freedom
## Multiple R-squared:  0.006492,   Adjusted R-squared:  -0.03325 
## F-statistic: 0.1634 on 1 and 25 DF,  p-value: 0.6895
# --- Model days storage --> Staphylococcus -----

model_Staphylococcus_hs_uni <- lm(`Staphylococcus sp.` ~ daysstorage,data=mastitis_rtu)
summary(model_Staphylococcus_hs_uni)
## 
## Call:
## lm(formula = `Staphylococcus sp.` ~ daysstorage, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.2099 -0.1944 -0.1944 -0.1788  2.9662 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.22542    0.16129   1.398    0.174
## daysstorage -0.03106    0.06295  -0.493    0.626
## 
## Residual standard error: 0.6922 on 25 degrees of freedom
## Multiple R-squared:  0.009643,   Adjusted R-squared:  -0.02997 
## F-statistic: 0.2434 on 1 and 25 DF,  p-value: 0.6261
# --- Model years RMS --> Staphylococcus -----

model_Staphylococcus_years_rms_uni <- lm(`Staphylococcus sp.` ~ yearsrms,data=mastitis_rtu)
summary(model_Staphylococcus_years_rms_uni)
## 
## Call:
## lm(formula = `Staphylococcus sp.` ~ yearsrms, data = mastitis_rtu)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35909 -0.28493 -0.13661 -0.06246  2.92825 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.43324    0.27265   1.589    0.125
## yearsrms    -0.03708    0.03509  -1.057    0.301
## 
## Residual standard error: 0.6805 on 25 degrees of freedom
## Multiple R-squared:  0.04275,    Adjusted R-squared:  0.004462 
## F-statistic: 1.117 on 1 and 25 DF,  p-value: 0.3008
# --- Model breed --> Staphylococcus -----

model_Staphylococcus_breed_uni <- lm(`Staphylococcus sp.` ~ breed,data=mastitis_rtu)
summary(model_Staphylococcus_breed_uni)
## 
## Call:
## lm(formula = `Staphylococcus sp.` ~ breed, data = mastitis_rtu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.2832 -0.1868 -0.1868 -0.1868  2.9893 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)      0.18683    0.17078   1.094    0.285
## breedCrossbread -0.18683    0.39131  -0.477    0.637
## breedJersey      0.09634    0.33437   0.288    0.776
## 
## Residual standard error: 0.7041 on 24 degrees of freedom
## Multiple R-squared:  0.01606,    Adjusted R-squared:  -0.06593 
## F-statistic: 0.1959 on 2 and 24 DF,  p-value: 0.8234
Anova(model_Staphylococcus_breed_uni,type="III")
## Anova Table (Type III tests)
## 
## Response: Staphylococcus sp.
##              Sum Sq Df F value Pr(>F)
## (Intercept)  0.5934  1  1.1968 0.2848
## breed        0.1942  2  0.1959 0.8234
## Residuals   11.8997 24

MAP

Step within system

# -------MAP analysis -------

# Descriptive analysis

# By overall system

table1(~ as.factor(positive)| step, data=map_2_farm_long)
farm_baseline_positive
(N=26)
farm_rtu_positive
(N=26)
Overall
(N=52)
as.factor(positive)
0 9 (34.6%) 23 (88.5%) 32 (61.5%)
1 17 (65.4%) 3 (11.5%) 20 (38.5%)
table1(~ as.factor(positive)| system:step, data=map_2_farm_long)
## Warning in .table1.internal(x = x, labels = labels, groupspan = groupspan, :
## Table has 14 columns. Are you sure this is what you want?
Composted
DIG
DIGDRY
DIGIR
DRYOnly
Green
Overall
farm_baseline_positive
(N=3)
farm_rtu_positive
(N=3)
farm_baseline_positive
(N=9)
farm_rtu_positive
(N=9)
farm_baseline_positive
(N=6)
farm_rtu_positive
(N=6)
farm_baseline_positive
(N=1)
farm_rtu_positive
(N=1)
farm_baseline_positive
(N=2)
farm_rtu_positive
(N=2)
farm_baseline_positive
(N=5)
farm_rtu_positive
(N=5)
farm_baseline_positive
(N=26)
farm_rtu_positive
(N=26)
as.factor(positive)
0 1 (33.3%) 2 (66.7%) 3 (33.3%) 9 (100%) 3 (50.0%) 6 (100%) 0 (0%) 1 (100%) 1 (50.0%) 2 (100%) 1 (20.0%) 3 (60.0%) 9 (34.6%) 23 (88.5%)
1 2 (66.7%) 1 (33.3%) 6 (66.7%) 0 (0%) 3 (50.0%) 0 (0%) 1 (100%) 0 (0%) 1 (50.0%) 0 (0%) 4 (80.0%) 2 (40.0%) 17 (65.4%) 3 (11.5%)
table1(~ as.factor(positive)| system_group:step, data=map_2_farm_long)
DIG-only
DIG+SEC
SEC-only
GRN
Overall
farm_baseline_positive
(N=9)
farm_rtu_positive
(N=9)
farm_baseline_positive
(N=7)
farm_rtu_positive
(N=7)
farm_baseline_positive
(N=5)
farm_rtu_positive
(N=5)
farm_baseline_positive
(N=5)
farm_rtu_positive
(N=5)
farm_baseline_positive
(N=26)
farm_rtu_positive
(N=26)
as.factor(positive)
0 3 (33.3%) 9 (100%) 3 (42.9%) 7 (100%) 2 (40.0%) 4 (80.0%) 1 (20.0%) 3 (60.0%) 9 (34.6%) 23 (88.5%)
1 6 (66.7%) 0 (0%) 4 (57.1%) 0 (0%) 3 (60.0%) 1 (20.0%) 4 (80.0%) 2 (40.0%) 17 (65.4%) 3 (11.5%)
# ---- MAP ----


map_freq <- as.data.frame(table(map_models$last_step_group,map_models$proc_step,map_models$positive)) %>%
  rename(last_step_group=Var1,proc_step=Var2,positive=Var3)

theme_set(theme_classic())

# Screw press

sp_n_map <- map_freq %>%
  filter(last_step_group == "ScrewPress") %>%
  group_by(proc_step) %>%
  mutate(Total = sum(Freq)) %>%
     mutate(perc_numeric = (Freq / Total) * 100,
  perc_pos = ifelse(perc_numeric == 0 | perc_numeric == 100,
                    as.character(round(perc_numeric, 0)),
                    sprintf("%.1f", perc_numeric))
) %>%
  filter(positive == 1) %>%
  select(!c(positive))

sp_n_map$n_perc <- paste0(sp_n_map$perc_pos,"% ","(",sp_n_map$Freq,"/",sp_n_map$Total,")")

# Digester

digester_n_map <- map_freq %>%
  filter(last_step_group == "Digester") %>%
  group_by(proc_step) %>%
  mutate(Total = sum(Freq)) %>%
   mutate(Total = sum(Freq)) %>%
     mutate(perc_numeric = (Freq / Total) * 100,
  perc_pos = ifelse(perc_numeric == 0 | perc_numeric == 100,
                    as.character(round(perc_numeric, 0)),
                    sprintf("%.1f", perc_numeric))
) %>%
  filter(positive == 1) %>%
  select(!c(positive))

digester_n_map$n_perc <- paste0(digester_n_map$perc_pos,"% ","(",digester_n_map$Freq,"/",digester_n_map$Total,")")


# Secondary

sec_n_map <- map_freq %>%
  filter(last_step_group == "Dry-Comp") %>%
  group_by(proc_step) %>%
  mutate(Total = sum(Freq)) %>%
   mutate(Total = sum(Freq)) %>%
     mutate(perc_numeric = (Freq / Total) * 100,
  perc_pos = ifelse(perc_numeric == 0 | perc_numeric == 100,
                    as.character(round(perc_numeric, 0)),
                    sprintf("%.1f", perc_numeric))
) %>%
  filter(positive == 1) %>%
  select(!c(positive))

sec_n_map$n_perc <- paste0(sec_n_map$perc_pos,"% ","(",sec_n_map$Freq,"/",sec_n_map$Total,")")

# Join 3 systems

map_n <- rbind(sp_n_map,digester_n_map,sec_n_map) %>% select(!c(Freq,Total))

# Pivot wider

map_n_wide <- map_n %>% pivot_wider(id_cols=last_step_group ,names_from = proc_step, values_from = n_perc)

# Regression models

map_models$last_step_group<-relevel(map_models$last_step_group, ref="Digester")

model_map_system <- glm(positive ~ proc_step*last_step_group, data=map_models,family=binomial(link="logit"))
summary(model_map_system)
## 
## Call:
## glm(formula = positive ~ proc_step * last_step_group, family = binomial(link = "logit"), 
##     data = map_models)
## 
## Coefficients:
##                                         Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                               0.5108     0.5164   0.989  0.32256   
## proc_stepPOST                            -2.4567     0.9155  -2.684  0.00728 **
## last_step_groupScrewPress                -0.7520     0.6550  -1.148  0.25093   
## last_step_groupDry-Comp                  -2.0149     0.9369  -2.151  0.03151 * 
## proc_stepPOST:last_step_groupScrewPress   1.3116     1.1182   1.173  0.24082   
## proc_stepPOST:last_step_groupDry-Comp     1.6582     1.5965   1.039  0.29895   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 126.72  on 103  degrees of freedom
## Residual deviance: 109.68  on  98  degrees of freedom
## AIC: 121.68
## 
## Number of Fisher Scoring iterations: 4
emmeans(model_map_system,revpairwise~proc_step|last_step_group,type="response")
## $emmeans
## last_step_group = Digester:
##  proc_step   prob     SE  df asymp.LCL asymp.UCL
##  PRE       0.6250 0.1210 Inf    0.3772     0.821
##  POST      0.1250 0.0827 Inf    0.0314     0.386
## 
## last_step_group = ScrewPress:
##  proc_step   prob     SE  df asymp.LCL asymp.UCL
##  PRE       0.4400 0.0993 Inf    0.2629     0.634
##  POST      0.2000 0.0800 Inf    0.0858     0.400
## 
## last_step_group = Dry-Comp:
##  proc_step   prob     SE  df asymp.LCL asymp.UCL
##  PRE       0.1818 0.1163 Inf    0.0458     0.507
##  POST      0.0909 0.0867 Inf    0.0126     0.438
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale 
## 
## $contrasts
## last_step_group = Digester:
##  contrast   odds.ratio     SE  df null z.ratio p.value
##  POST / PRE     0.0857 0.0785 Inf    1  -2.684  0.0073
## 
## last_step_group = ScrewPress:
##  contrast   odds.ratio     SE  df null z.ratio p.value
##  POST / PRE     0.3182 0.2043 Inf    1  -1.783  0.0745
## 
## last_step_group = Dry-Comp:
##  contrast   odds.ratio     SE  df null z.ratio p.value
##  POST / PRE     0.4500 0.5886 Inf    1  -0.611  0.5415
## 
## Tests are performed on the log odds ratio scale
emmeans_4 <-emmeans(model_map_system,revpairwise~proc_step|last_step_group,type="response")

map_emmeans <- as.data.frame(emmeans_4$emmeans)

# Herd size

model_map_hs_cat <- glm(positive ~ herdsize_cat, data=map_models,family=binomial(link="logit"))
summary(model_map_hs_cat)
## 
## Call:
## glm(formula = positive ~ herdsize_cat, family = binomial(link = "logit"), 
##     data = map_models)
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)
## (Intercept)            -0.5108     0.4216  -1.212    0.226
## herdsize_cat1350-3250  -0.6592     0.5687  -1.159    0.246
## herdsize_cat>3250      -0.2915     0.5378  -0.542    0.588
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 126.72  on 103  degrees of freedom
## Residual deviance: 125.33  on 101  degrees of freedom
## AIC: 131.33
## 
## Number of Fisher Scoring iterations: 4
model_map_hs <- glm(positive ~ herdsize_hundred, data=map_models,family=binomial(link="logit"))
summary(model_map_hs)
## 
## Call:
## glm(formula = positive ~ herdsize_hundred, family = binomial(link = "logit"), 
##     data = map_models)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)  
## (Intercept)      -0.937132   0.379806  -2.467   0.0136 *
## herdsize_hundred  0.002629   0.010143   0.259   0.7955  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 126.72  on 103  degrees of freedom
## Residual deviance: 126.65  on 102  degrees of freedom
## AIC: 130.65
## 
## Number of Fisher Scoring iterations: 4
# Years RMS

model_map_years <- glm(positive ~ yearsrms, data=map_models,family=binomial(link="logit"))
summary(model_map_years)
## 
## Call:
## glm(formula = positive ~ yearsrms, family = binomial(link = "logit"), 
##     data = map_models)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.55704    0.41552  -1.341    0.180
## yearsrms    -0.04486    0.05453  -0.823    0.411
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 126.72  on 103  degrees of freedom
## Residual deviance: 126.03  on 102  degrees of freedom
## AIC: 130.03
## 
## Number of Fisher Scoring iterations: 4
# Final model will not include any confounders

#Estimate p-values

df_map <- as.data.frame(emmeans_4$contrasts)

p_sp_map <- round(subset(df_map,df_map$last_step_group=="ScrewPress")$p.value,3)

p_dig_map <- round(subset(df_map,df_map$last_step_group=="Digester")$p.value,3)

p_sec_map <- round(subset(df_map,df_map$last_step_group=="Dry-Comp")$p.value,3)

# Estimate confidence intervals (OR)

exp(Confint(model_map_system))
##                                           Estimate      2.5 %      97.5 %
## (Intercept)                             1.66666667 0.61893400   4.8987536
## proc_stepPOST                           0.08571429 0.01075721   0.4436999
## last_step_groupScrewPress               0.47142857 0.12473156   1.6709734
## last_step_groupDry-Comp                 0.13333333 0.01626972   0.7308507
## proc_stepPOST:last_step_groupScrewPress 3.71212121 0.44706049  39.9339208
## proc_stepPOST:last_step_groupDry-Comp   5.25000054 0.15545455 123.2297737
# Intraclass correlation coefficient

performance::icc(model_map_system)
## Warning: `model` has no random effects.
## NULL
# Use effect size package to estimate relative risk

or_df_map<- as.data.frame(confint(emmeans_4$contrasts))
or_df_map_p0 <- as.data.frame(emmeans_4$emmeans) %>% filter(proc_step=="PRE") %>% select(1:3)

# Extract odds ratios

or_sp <- subset(or_df_map,or_df_map$last_step_group=="ScrewPress")$odds.ratio
lcl_sp <- subset(or_df_map,or_df_map$last_step_group=="ScrewPress")$asymp.LCL
ucl_sp <- subset(or_df_map,or_df_map$last_step_group=="ScrewPress")$asymp.UCL

or_dig <- subset(or_df_map,or_df_map$last_step_group=="Digester")$odds.ratio
lcl_dig <- subset(or_df_map,or_df_map$last_step_group=="Digester")$asymp.LCL
ucl_dig <- subset(or_df_map,or_df_map$last_step_group=="Digester")$asymp.UCL

or_sec <- subset(or_df_map,or_df_map$last_step_group=="Dry-Comp")$odds.ratio
lcl_sec <- subset(or_df_map,or_df_map$last_step_group=="Dry-Comp")$asymp.LCL
ucl_sec <- subset(or_df_map,or_df_map$last_step_group=="Dry-Comp")$asymp.UCL

# Extract probability in reference group

p0_sp <- subset(or_df_map_p0,or_df_map_p0$last_step_group=="ScrewPress")$prob
p0_dig <- subset(or_df_map_p0,or_df_map_p0$last_step_group=="Digester")$prob
p0_sec <- subset(or_df_map_p0,or_df_map_p0$last_step_group=="Dry-Comp")$prob

# Relative risk screw press

estimate_map_sp <- effectsize::oddsratio_to_riskratio(or_sp,p0_sp)
lower_map_sp <- effectsize::oddsratio_to_riskratio(lcl_sp,p0_sp)
upper_map_sp <- effectsize::oddsratio_to_riskratio(ucl_sp,p0_sp)

# Relative risk digester

estimate_map_dig <- effectsize::oddsratio_to_riskratio(or_dig,p0_dig)
lower_map_dig <- effectsize::oddsratio_to_riskratio(lcl_dig,p0_dig)
upper_map_dig <- effectsize::oddsratio_to_riskratio(ucl_dig,p0_dig)

# Relative risk secondary

estimate_map_sec <- effectsize::oddsratio_to_riskratio(or_sec,p0_sec)
lower_map_sec <- effectsize::oddsratio_to_riskratio(lcl_sec,p0_sec)
upper_map_sec <- effectsize::oddsratio_to_riskratio(ucl_sec,p0_sec)


# Creating DataFrame

models_map_df <- data.frame(
  last_step_group = c("Screw Press", "Digester", "Secondary"),
  estimate = c(estimate_map_sp, estimate_map_dig, estimate_map_sec),
  lower = c(lower_map_sp, lower_map_dig, lower_map_sec),
  upper = c(upper_map_sp, upper_map_dig, upper_map_sec)
)

e_ob_sp <- data.frame(last_step_group="Screw Press",
                      estimate = 1,
                      upper = 1,
                      lower = 1,
                      proc_step="PRE",
                      p_value = 1)

e_ob_dig <- data.frame(last_step_group="Digester",
                       estimate = 1,
                       upper = 1,
                       lower = 1,
                       proc_step="PRE",
                       p_value = 1)

e_ob_sec <- data.frame(last_step_group="Secondary",
                       estimate = 1,
                       upper = 1,
                       lower = 1,
                       proc_step="PRE",
                       p_value = 1)


sp_models <- models_map_df %>% filter(last_step_group=="Screw Press")

sp_models$p_value <- p_sp_map

sp_models$proc_step <- "POST"

sp_models_2_map <- rbind(e_ob_sp,sp_models)

sp_models_2_map
##   last_step_group  estimate    upper     lower proc_step p_value
## 1     Screw Press 1.0000000 1.000000 1.0000000       PRE   1.000
## 2     Screw Press 0.4545455 1.063887 0.1506972      POST   0.075
digester_models <- models_map_df %>% filter(last_step_group=="Digester")

digester_models$p_value <- p_dig_map

digester_models$proc_step <- "POST"

digester_models_2_map <- rbind(e_ob_dig,digester_models)

digester_models_2_map
##   last_step_group estimate     upper     lower proc_step p_value
## 1        Digester      1.0 1.0000000 1.0000000       PRE   1.000
## 2        Digester      0.2 0.7394695 0.0371173      POST   0.007
secondary_models <- models_map_df %>% filter(last_step_group=="Secondary")

secondary_models$p_value <- p_sec_map

secondary_models$proc_step <- "POST"

secondary_models_2_map <- rbind(e_ob_sec,secondary_models)

secondary_models_2_map
##   last_step_group estimate   upper      lower proc_step p_value
## 1       Secondary      1.0 1.00000 1.00000000       PRE   1.000
## 2       Secondary      0.5 3.10671 0.04204598      POST   0.542
# Screw press

sp_map_plot <- ggplot(sp_n_map, aes(x=proc_step, y=perc_numeric, fill=perc_numeric)) + 
  geom_bar(stat = "identity") +
  coord_flip() +
  ylim(0,100) +
  labs(y="Prevalence(%)",title="Screw Press", fill="Processing step") +
  scale_x_discrete(limits = rev) +
  theme(axis.title.y = element_blank(),legend.position = "none") +
  scale_fill_gradient(low="yellow", 
                      high="red", space ="Lab" ,limits=c(0,100)) +
  geom_text(aes(label=n_perc), position=position_dodge(width=0.9), vjust=0.5,hjust=-0.1,size=3)


#  Digester

dig_map_plot <- ggplot(digester_n_map, aes(x=proc_step, y=perc_numeric, fill=perc_numeric)) + 
  geom_bar(stat = "identity") +
  coord_flip() +
  ylim(0,100) +
  labs(y="Prevalence (%)",title="Digester", fill="Processing step") +
  scale_x_discrete(limits = rev) +
  theme(axis.title.y = element_blank(),legend.position = "none")  +
  scale_fill_gradient(low="yellow", 
                      high="red", space ="Lab" ,limits=c(0,100)) +
  geom_text(aes(label=n_perc), position=position_dodge(width=0.9), vjust=0.5,hjust=-0.1,size=3)

# Secondary

sec_map_plot <- ggplot(sec_n_map, aes(x=proc_step, y=perc_numeric, fill=perc_numeric)) + 
  geom_bar(stat = "identity") +
  coord_flip() +
  ylim(0,100) +
  labs(y="Prevalence (%)",title="Secondary", fill="Processing step") +
  scale_x_discrete(limits = rev) +
  theme(axis.title.y = element_blank(),legend.position = "none")  +
  scale_fill_gradient(low="yellow", 
                      high="red", space ="Lab" ,limits=c(0,100)) +
  geom_text(aes(label=n_perc), position=position_dodge(width=0.9), vjust=0.5,hjust=-0.1,size=3) 



# Add IRR and 95% CI


sp_map_rr <- ggplot(sp_models_2_map, aes(x = proc_step, y = estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2, position = position_dodge(0.05)) +
  ylim(0, 2) +
  scale_x_discrete(limits = rev(levels(sp_models_2_map$proc_step))) +
  coord_flip() +
  geom_hline(yintercept=1, linetype="dashed", size=0.5) +
  labs(y="RR (%95CI)") +
  theme(axis.text.y=element_blank(),axis.title.y = element_blank()) 
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#+
#geom_text(aes(label = ifelse(p_sp_map < 0.05, "*", "")),
#         hjust = -12, size = 5, color = "black")



#theme(plot.title = element_text(size = 14, face = "italic", hjust = 0.90),axis.text.y=element_blank(),axis.title.y = element_blank()) 


dig_map_rr <- ggplot(digester_models_2_map, aes(x = proc_step, y = estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2, position = position_dodge(0.05)) +
  ylim(0, 2) +
  scale_x_discrete(limits = rev(levels(digester_models_2_map$proc_step))) +
  coord_flip() +
  geom_hline(yintercept=1, linetype="dashed", size=0.5) +
  labs(y="RR (%95CI)") +
  theme(axis.text.y=element_blank(),axis.title.y = element_blank()) 
# +
#geom_text(aes(label = ifelse(p_value < 0.05, "*", "")),
#          nudge_x =0, nudge_y = upper_map_dig , hjust = 0, size = 5, color = "black")


sec_map_rr <- ggplot(secondary_models_2_map, aes(x = proc_step, y = estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2, position = position_dodge(0.05)) +
  ylim(0, 2) +
  scale_x_discrete(limits = rev(levels(secondary_models_2_map$proc_step))) +
  coord_flip() +
  geom_hline(yintercept=1, linetype="dashed", size=0.5) +
  labs(y="RR (%95CI)") +
  theme(axis.text.y=element_blank(),axis.title.y = element_blank()) + 
  geom_segment(
    aes(x = 1, xend = 1, y = lower, yend = 2),
    position = position_dodge(0.05), arrow = arrow(length = unit(0.2, "cm"))
  )


map_figure_step <- ggarrange( sp_map_plot + rremove("ylab"),
                         sp_map_rr + rremove("ylab") ,
                         dig_map_plot + rremove("ylab") ,
                         dig_map_rr + rremove("ylab"), 
                         sec_map_plot + rremove("ylab"),
                         sec_map_rr + rremove("ylab"),
                         ncol = 2, nrow = 3,align = "h",labels = c("A", "", "C","","E")) 

map_figure_step_annotated <- annotate_figure(map_figure_step, top = text_grob("MAP", 
                                            color = "black", size = 20))


ggsave(plot = last_plot(),"./figures/map_figure_step.png",width = 15, height = 15, units = "cm") 

Overall

# ---- MAP----

map_3 <- as.data.frame(table(map_2_farm_long$step,map_2_farm_long$positive, map_2_farm_long$system_group))


table_map_overall <- map_3 %>%
  rename(system_group="Var3",step="Var1",positive="Var2") %>%
  group_by(step,system_group) %>%
  mutate(total_samples = sum(Freq)) %>%
  filter(positive==1) %>%
mutate(perc_numeric = (Freq / total_samples) * 100,
  perc_pos = ifelse(perc_numeric == 0 | perc_numeric == 100,
                    as.character(round(perc_numeric, 0)),
                    sprintf("%.1f", perc_numeric))
) %>%
  select(!c(positive)) %>%
  filter(perc_pos != "NaN" ) 

table_map_overall$system_group <- factor(table_map_overall$system_group,c("GRN", "DIG-only", "SEC-only","DIG+SEC"))


table_map_overall$n_perc <- paste0(table_map_overall$perc_pos,"% ","(",table_map_overall$Freq,"/",table_map_overall$total_samples,")")


overall_map_slurry <- table_map_overall %>% 
  filter(step=="farm_baseline_positive") %>%
ggplot(aes(x=system_group, y=perc_numeric, fill=perc_numeric)) + 
  geom_bar(stat = "identity") +
  coord_flip() +
  ylim(0,100) +
  labs(y="Prevalence (%)",title="Raw slurry", fill="Processing step") +
  scale_x_discrete(limits = rev) +
  theme(axis.title.y = element_blank(),legend.position = "none")  +
  scale_fill_gradient(low="yellow", 
                      high="red", space ="Lab" ,limits=c(0,100)) +
  geom_text(aes(label=n_perc), position=position_dodge(width=0.9), vjust=0.5,hjust=2,size=3)



overall_map_rtu <- table_map_overall %>% 
  filter(step=="farm_rtu_positive") %>%
  ggplot(aes(x=system_group, y=perc_numeric, fill=perc_numeric)) + 
  geom_bar(stat = "identity") +
  coord_flip() +
  ylim(0,100) +
  labs(y="Prevalence (%)",title="RTU", fill="Processing step") +
  scale_x_discrete(limits = rev) +
  theme(axis.title.y = element_blank(),legend.position = "none")  +
  scale_fill_gradient(low="yellow", 
                      high="red", space ="Lab" ,limits=c(0,100)) +
  geom_text(aes(label=n_perc), position=position_dodge(width=0.9), vjust=0.5,hjust=-0.1,size=3)


map_figure_overall <- ggarrange( overall_map_slurry + rremove("ylab")+rremove("xlab"),
                              overall_map_rtu + rremove("ylab")+rremove("xlab"),
                              ncol = 2, nrow = 1,align = "h",labels = c("A", "B"))

map_figure_overall_annotated <- annotate_figure(map_figure_overall, top = text_grob("MAP", 
                                                 color = "black", size = 20))

map_figure_overall_annotated

ggsave(plot = last_plot(),"./figures/map_figure_overall.png",width = 20, height = 10, units = "cm") 

Salmonella

Step within system

# ----- Salmonella Analysis ------

# Descriptive analysis

table1(~ daysstorage+yearsrms+herdsize| system_group, data=metadata)
DIG-only
(N=9)
DIG+SEC
(N=7)
SEC-only
(N=5)
Green
(N=6)
Overall
(N=27)
daysstorage
Mean (SD) 2.39 (3.62) 0.929 (0.607) 1.00 (0.612) 1.00 (0) 1.44 (2.16)
Median [Min, Max] 1.00 [0.667, 12.0] 0.500 [0.500, 2.00] 1.00 [0.500, 2.00] 1.00 [1.00, 1.00] 1.00 [0.500, 12.0]
yearsrms
Mean (SD) 6.56 (4.53) 7.29 (4.96) 6.60 (2.88) 6.83 (2.40) 6.81 (3.80)
Median [Min, Max] 7.00 [2.00, 14.0] 5.00 [2.00, 16.0] 5.00 [4.00, 11.0] 7.50 [4.00, 10.0] 7.00 [2.00, 16.0]
herdsize
Mean (SD) 4220 (2570) 3180 (1430) 1520 (1530) 888 (575) 2710 (2200)
Median [Min, Max] 3250 [1710, 8740] 3250 [520, 5300] 1120 [430, 4150] 825 [240, 1880] 1900 [240, 8740]
table1(~ as.factor(positive)| step, data=salmonella_2_farm_long)
farm_baseline_positive
(N=26)
farm_rtu_positive
(N=26)
Overall
(N=52)
as.factor(positive)
0 5 (19.2%) 19 (73.1%) 24 (46.2%)
1 21 (80.8%) 7 (26.9%) 28 (53.8%)
table1(~ as.factor(positive)| system:step, data=salmonella_2_farm_long)
## Warning in .table1.internal(x = x, labels = labels, groupspan = groupspan, :
## Table has 14 columns. Are you sure this is what you want?
Composted
DIG
DIGDRY
DIGIR
DRYOnly
Green
Overall
farm_baseline_positive
(N=3)
farm_rtu_positive
(N=3)
farm_baseline_positive
(N=9)
farm_rtu_positive
(N=9)
farm_baseline_positive
(N=6)
farm_rtu_positive
(N=6)
farm_baseline_positive
(N=1)
farm_rtu_positive
(N=1)
farm_baseline_positive
(N=2)
farm_rtu_positive
(N=2)
farm_baseline_positive
(N=5)
farm_rtu_positive
(N=5)
farm_baseline_positive
(N=26)
farm_rtu_positive
(N=26)
as.factor(positive)
0 0 (0%) 2 (66.7%) 2 (22.2%) 6 (66.7%) 3 (50.0%) 6 (100%) 0 (0%) 1 (100%) 0 (0%) 2 (100%) 0 (0%) 2 (40.0%) 5 (19.2%) 19 (73.1%)
1 3 (100%) 1 (33.3%) 7 (77.8%) 3 (33.3%) 3 (50.0%) 0 (0%) 1 (100%) 0 (0%) 2 (100%) 0 (0%) 5 (100%) 3 (60.0%) 21 (80.8%) 7 (26.9%)
table1(~ as.factor(positive)| system_group:step, data=salmonella_2_farm_long)
DIG-only
DIG+SEC
SEC-only
GRN
Overall
farm_baseline_positive
(N=9)
farm_rtu_positive
(N=9)
farm_baseline_positive
(N=7)
farm_rtu_positive
(N=7)
farm_baseline_positive
(N=5)
farm_rtu_positive
(N=5)
farm_baseline_positive
(N=5)
farm_rtu_positive
(N=5)
farm_baseline_positive
(N=26)
farm_rtu_positive
(N=26)
as.factor(positive)
0 2 (22.2%) 6 (66.7%) 3 (42.9%) 7 (100%) 0 (0%) 4 (80.0%) 0 (0%) 2 (40.0%) 5 (19.2%) 19 (73.1%)
1 7 (77.8%) 3 (33.3%) 4 (57.1%) 0 (0%) 5 (100%) 1 (20.0%) 5 (100%) 3 (60.0%) 21 (80.8%) 7 (26.9%)
# ---- Salmnnella ----


salmonella_freq <- as.data.frame(table(salmonella_models$last_step_group,salmonella_models$proc_step,salmonella_models$positive)) %>%
  rename(last_step_group=Var1,proc_step=Var2,positive=Var3)

theme_set(theme_classic())

# Screw press

sp_n_sal <- salmonella_freq %>%
  filter(last_step_group == "ScrewPress") %>%
  group_by(proc_step) %>%
  mutate(Total = sum(Freq)) %>%
mutate(perc_numeric = (Freq / Total) * 100,
  perc_pos = ifelse(perc_numeric == 0 | perc_numeric == 100,
                    as.character(round(perc_numeric, 0)),
                    sprintf("%.1f", perc_numeric))
) %>%
  filter(positive == 1) %>%
  select(!c(positive))

sp_n_sal$n_perc <- paste0(sp_n_sal$perc_pos,"% ","(",sp_n_sal$Freq,"/",sp_n_sal$Total,")")

# Digester

digester_n_sal <- salmonella_freq %>%
  filter(last_step_group == "Digester") %>%
  group_by(proc_step) %>%
  mutate(Total = sum(Freq)) %>%
mutate(perc_numeric = (Freq / Total) * 100,
  perc_pos = ifelse(perc_numeric == 0 | perc_numeric == 100,
                    as.character(round(perc_numeric, 0)),
                    sprintf("%.1f", perc_numeric))
) %>%
  filter(positive == 1) %>%
  select(!c(positive))

digester_n_sal$n_perc <- paste0(digester_n_sal$perc_pos,"% ","(",digester_n_sal$Freq,"/",digester_n_sal$Total,")")


# Secondary

sec_n_sal <- salmonella_freq %>%
  filter(last_step_group == "Dry-Comp") %>%
  group_by(proc_step) %>%
  mutate(Total = sum(Freq)) %>%
  mutate(perc_numeric = (Freq / Total) * 100,
  perc_pos = ifelse(perc_numeric == 0 | perc_numeric == 100,
                    as.character(round(perc_numeric, 0)),
                    sprintf("%.1f", perc_numeric))
) %>%
  filter(positive == 1) %>%
  select(!c(positive))

sec_n_sal$n_perc <- paste0(sec_n_sal$perc_pos,"% ","(",sec_n_sal$Freq,"/",sec_n_sal$Total,")")

# Join 3 systems

salmonella_n <- rbind(sp_n_sal,digester_n_sal,sec_n_sal) %>% select(!c(Freq,Total))

# Pivot wider

salmonella_n_wide <- salmonella_n %>% pivot_wider(id_cols=last_step_group ,names_from = proc_step, values_from = n_perc)

# Regression model

salmonella_models$last_step_group<-relevel(salmonella_models$last_step_group, ref="Digester")

model_salmonella_system <- glm(positive ~ proc_step*last_step_group, data=salmonella_models,family=binomial(link="logit"))
summary(model_salmonella_system)
## 
## Call:
## glm(formula = positive ~ proc_step * last_step_group, family = binomial(link = "logit"), 
##     data = salmonella_models)
## 
## Coefficients:
##                                         Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                               0.5108     0.5164   0.989  0.32256   
## proc_stepPOST                            -3.2189     1.1547  -2.788  0.00531 **
## last_step_groupScrewPress                -0.9163     0.6583  -1.392  0.16394   
## last_step_groupDry-Comp                  -0.3285     0.7958  -0.413  0.67976   
## proc_stepPOST:last_step_groupScrewPress   3.5443     1.2885   2.751  0.00595 **
## proc_stepPOST:last_step_groupDry-Comp     0.7340     1.6733   0.439  0.66093   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 138.59  on 103  degrees of freedom
## Residual deviance: 118.78  on  98  degrees of freedom
## AIC: 130.78
## 
## Number of Fisher Scoring iterations: 5
emmeans(model_salmonella_system,revpairwise~proc_step|last_step_group,type="response")
## $emmeans
## last_step_group = Digester:
##  proc_step   prob     SE  df asymp.LCL asymp.UCL
##  PRE       0.6250 0.1210 Inf   0.37724     0.821
##  POST      0.0625 0.0605 Inf   0.00873     0.335
## 
## last_step_group = ScrewPress:
##  proc_step   prob     SE  df asymp.LCL asymp.UCL
##  PRE       0.4000 0.0980 Inf   0.23048     0.597
##  POST      0.4800 0.0999 Inf   0.29637     0.669
## 
## last_step_group = Dry-Comp:
##  proc_step   prob     SE  df asymp.LCL asymp.UCL
##  PRE       0.5455 0.1501 Inf   0.26806     0.797
##  POST      0.0909 0.0867 Inf   0.01264     0.439
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale 
## 
## $contrasts
## last_step_group = Digester:
##  contrast   odds.ratio     SE  df null z.ratio p.value
##  POST / PRE     0.0400 0.0462 Inf    1  -2.788  0.0053
## 
## last_step_group = ScrewPress:
##  contrast   odds.ratio     SE  df null z.ratio p.value
##  POST / PRE     1.3846 0.7917 Inf    1   0.569  0.5693
## 
## last_step_group = Dry-Comp:
##  contrast   odds.ratio     SE  df null z.ratio p.value
##  POST / PRE     0.0833 0.1009 Inf    1  -2.052  0.0402
## 
## Tests are performed on the log odds ratio scale
emmeans_4 <-emmeans(model_salmonella_system,revpairwise~proc_step|last_step_group,type="response")

sal_emmeans <- as.data.frame(emmeans_4$emmeans)

# Herd size

model_salmonella_hs_cat <- glm(positive ~ herdsize_cat, data=salmonella_models,family=binomial(link="logit"))
summary(model_salmonella_hs_cat)
## 
## Call:
## glm(formula = positive ~ herdsize_cat, family = binomial(link = "logit"), 
##     data = salmonella_models)
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)  
## (Intercept)             0.1671     0.4097   0.408   0.6834  
## herdsize_cat1350-3250  -0.5945     0.5272  -1.128   0.2595  
## herdsize_cat>3250      -1.0833     0.5334  -2.031   0.0422 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 138.59  on 103  degrees of freedom
## Residual deviance: 134.34  on 101  degrees of freedom
## AIC: 140.34
## 
## Number of Fisher Scoring iterations: 4
model_salmonella_hs <- glm(positive ~ herdsize_hundred, data=salmonella_models,family=binomial(link="logit"))
summary(model_salmonella_hs)
## 
## Call:
## glm(formula = positive ~ herdsize_hundred, family = binomial(link = "logit"), 
##     data = salmonella_models)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)  
## (Intercept)       0.03709    0.36008   0.103   0.9180  
## herdsize_hundred -0.01722    0.01045  -1.648   0.0994 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 138.59  on 103  degrees of freedom
## Residual deviance: 135.67  on 102  degrees of freedom
## AIC: 139.67
## 
## Number of Fisher Scoring iterations: 4
# Years RMS

model_salmonella_years <- glm(positive ~ yearsrms, data=salmonella_models,family=binomial(link="logit"))
summary(model_salmonella_years)
## 
## Call:
## glm(formula = positive ~ yearsrms, family = binomial(link = "logit"), 
##     data = salmonella_models)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -0.75590    0.39966  -1.891   0.0586 .
## yearsrms     0.04143    0.04947   0.838   0.4023  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 138.59  on 103  degrees of freedom
## Residual deviance: 137.88  on 102  degrees of freedom
## AIC: 141.88
## 
## Number of Fisher Scoring iterations: 4
# Final model will not include confounders

#Estimate p-values

df_salmonella <- as.data.frame(emmeans_4$contrasts)

p_sp_sal <- round(subset(df_salmonella,df_salmonella$last_step_group=="ScrewPress")$p.value,3)

p_dig_sal <- round(subset(df_salmonella,df_salmonella$last_step_group=="Digester")$p.value,3)

p_sec_sal <- round(subset(df_salmonella,df_salmonella$last_step_group=="Dry-Comp")$p.value,3)

# Estimate confidence intervals (OR)

exp(Confint(model_salmonella_system))
##                                          Estimate       2.5 %     97.5 %
## (Intercept)                              1.666667 0.618934004   4.898754
## proc_stepPOST                            0.040000 0.001946178   0.274080
## last_step_groupScrewPress                0.400000 0.104842206   1.422297
## last_step_groupDry-Comp                  0.720000 0.147169303   3.488908
## proc_stepPOST:last_step_groupScrewPress 34.615385 3.626501266 828.818279
## proc_stepPOST:last_step_groupDry-Comp    2.083333 0.057273753  76.003991
# Intraclass correlation coefficient

performance::icc(model_salmonella_system)
## Warning: `model` has no random effects.
## NULL
# Use effect size package to estimate relative risk

or_df_sal<- as.data.frame(confint(emmeans_4$contrasts))
or_df_sal_p0 <- as.data.frame(emmeans_4$emmeans) %>% filter(proc_step=="PRE") %>% select(1:3)

# Extract odds ratios

or_sp <- subset(or_df_sal,or_df_sal$last_step_group=="ScrewPress")$odds.ratio
lcl_sp <- subset(or_df_sal,or_df_sal$last_step_group=="ScrewPress")$asymp.LCL
ucl_sp <- subset(or_df_sal,or_df_sal$last_step_group=="ScrewPress")$asymp.UCL

or_dig <- subset(or_df_sal,or_df_sal$last_step_group=="Digester")$odds.ratio
lcl_dig <- subset(or_df_sal,or_df_sal$last_step_group=="Digester")$asymp.LCL
ucl_dig <- subset(or_df_sal,or_df_sal$last_step_group=="Digester")$asymp.UCL

or_sec <- subset(or_df_sal,or_df_sal$last_step_group=="Dry-Comp")$odds.ratio
lcl_sec <- subset(or_df_sal,or_df_sal$last_step_group=="Dry-Comp")$asymp.LCL
ucl_sec <- subset(or_df_sal,or_df_sal$last_step_group=="Dry-Comp")$asymp.UCL

# Extract probability in reference group

p0_sp <- subset(or_df_sal_p0,or_df_sal_p0$last_step_group=="ScrewPress")$prob
p0_dig <- subset(or_df_sal_p0,or_df_sal_p0$last_step_group=="Digester")$prob
p0_sec <- subset(or_df_sal_p0,or_df_sal_p0$last_step_group=="Dry-Comp")$prob

# Relative risk screw press

estimate_sal_sp <- effectsize::oddsratio_to_riskratio(or_sp,p0_sp)
lower_sal_sp <- effectsize::oddsratio_to_riskratio(lcl_sp,p0_sp)
upper_sal_sp <- effectsize::oddsratio_to_riskratio(ucl_sp,p0_sp)

# Relative risk digester

estimate_sal_dig <- effectsize::oddsratio_to_riskratio(or_dig,p0_dig)
lower_sal_dig <- effectsize::oddsratio_to_riskratio(lcl_dig,p0_dig)
upper_sal_dig <- effectsize::oddsratio_to_riskratio(ucl_dig,p0_dig)

# Relative risk secondary

estimate_sal_sec <- effectsize::oddsratio_to_riskratio(or_sec,p0_sec)
lower_sal_sec <- effectsize::oddsratio_to_riskratio(lcl_sec,p0_sec)
upper_sal_sec <- effectsize::oddsratio_to_riskratio(ucl_sec,p0_sec)


# Creating DataFrame

models_salmonella_df <- data.frame(
  last_step_group = c("Screw Press", "Digester", "Secondary"),
  estimate = c(estimate_sal_sp, estimate_sal_dig, estimate_sal_sec),
  lower = c(lower_sal_sp, lower_sal_dig, lower_sal_sec),
  upper = c(upper_sal_sp, upper_sal_dig, upper_sal_sec)
)

e_ob_sp <- data.frame(last_step_group="Screw Press",
                                estimate = 1,
                                upper = 1,
                                lower = 1,
                                proc_step="PRE",
                                p_value = 1)

e_ob_dig <- data.frame(last_step_group="Digester",
                      estimate = 1,
                      upper = 1,
                      lower = 1,
                      proc_step="PRE",
                      p_value = 1)

e_ob_sec <- data.frame(last_step_group="Secondary",
                      estimate = 1,
                      upper = 1,
                      lower = 1,
                      proc_step="PRE",
                      p_value = 1)


sp_models <- models_salmonella_df %>% filter(last_step_group=="Screw Press")

sp_models$p_value <- p_sp_sal

sp_models$proc_step <- "POST"

sp_models_2_sal <- rbind(e_ob_sp,sp_models)

sp_models_2_sal
##   last_step_group estimate    upper    lower proc_step p_value
## 1     Screw Press      1.0 1.000000 1.000000       PRE   1.000
## 2     Screw Press      1.2 1.847419 0.578379      POST   0.569
digester_models <- models_salmonella_df %>% filter(last_step_group=="Digester")

digester_models$p_value <- p_dig_sal

digester_models$proc_step <- "POST"

digester_models_2_sal <- rbind(e_ob_dig,digester_models)

digester_models_2_sal
##   last_step_group estimate    upper      lower proc_step p_value
## 1        Digester      1.0 1.000000 1.00000000       PRE   1.000
## 2        Digester      0.1 0.624922 0.01101919      POST   0.005
secondary_models <- models_salmonella_df %>% filter(last_step_group=="Secondary")

secondary_models$p_value <- p_sec_sal

secondary_models$proc_step <- "POST"

secondary_models_2_sal <- rbind(e_ob_sec,secondary_models)

secondary_models_2_sal
##   last_step_group  estimate     upper      lower proc_step p_value
## 1       Secondary 1.0000000 1.0000000 1.00000000       PRE    1.00
## 2       Secondary 0.1666667 0.9492171 0.01691838      POST    0.04
# Screw press

sp_salmonella_plot <- ggplot(sp_n_sal, aes(x=proc_step, y=perc_numeric, fill=perc_numeric)) + 
  geom_bar(stat = "identity") +
  coord_flip() +
  ylim(0,100) +
  labs(y="Prevalence (%)",title="Screw Press", fill="Processing step") +
  scale_x_discrete(limits = rev) +
  theme(axis.title.y = element_blank(),legend.position = "none") +
  scale_fill_gradient(low="yellow", 
                        high="red", space ="Lab" ,limits=c(0,100)) +
  geom_text(aes(label=n_perc), position=position_dodge(width=0.9), vjust=0.5,hjust=-0.1,size=3)


#  Digester

dig_salmonella_plot <- ggplot(digester_n_sal, aes(x=proc_step, y=perc_numeric, fill=perc_numeric)) + 
  geom_bar(stat = "identity") +
  coord_flip() +
  ylim(0,100) +
  labs(y="Prevalence (%)",title="Digester", fill="Processing step") +
  scale_x_discrete(limits = rev) +
  theme(axis.title.y = element_blank(),legend.position = "none")  +
  scale_fill_gradient(low="yellow", 
                      high="red", space ="Lab" ,limits=c(0,100)) +
  geom_text(aes(label=n_perc), position=position_dodge(width=0.9), vjust=0.5,hjust=-0.1,size=3)

# Secondary

sec_salmonella_plot <- ggplot(sec_n_sal, aes(x=proc_step, y=perc_numeric, fill=perc_numeric)) + 
  geom_bar(stat = "identity") +
  coord_flip() +
  ylim(0,100) +
  labs(y="Prevalence (%)",title="Secondary", fill="Processing step") +
  scale_x_discrete(limits = rev) +
  theme(axis.title.y = element_blank(),legend.position = "none")  +
  scale_fill_gradient(low="yellow", 
                      high="red", space ="Lab" ,limits=c(0,100)) +
  geom_text(aes(label=n_perc), position=position_dodge(width=0.9), vjust=0.5,hjust=-0.1,size=3)



# Add IRR and 95% CI


sp_salmonella_rr <- ggplot(sp_models_2_sal, aes(x = proc_step, y = estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2, position = position_dodge(0.05)) +
  ylim(0, 2) +
  scale_x_discrete(limits = rev(levels(sp_models_2_sal$proc_step))) +
  coord_flip() +
  geom_hline(yintercept=1, linetype="dashed", size=0.5) +
  labs(y="RR (%95CI)") +
  theme(axis.text.y=element_blank(),axis.title.y = element_blank()) 
#+
  #geom_text(aes(label = ifelse(p_sp_sal < 0.05, "*", "")),
   #         hjust = -12, size = 5, color = "black")



  #theme(plot.title = element_text(size = 14, face = "italic", hjust = 0.90),axis.text.y=element_blank(),axis.title.y = element_blank()) 


dig_salmonella_rr <- ggplot(digester_models_2_sal, aes(x = proc_step, y = estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2, position = position_dodge(0.05)) +
  ylim(0, 2) +
  scale_x_discrete(limits = rev(levels(digester_models_2_sal$proc_step))) +
  coord_flip() +
  geom_hline(yintercept=1, linetype="dashed", size=0.5) +
  labs(y="RR (%95CI)") +
  theme(axis.text.y=element_blank(),axis.title.y = element_blank()) 
  # +
  #geom_text(aes(label = ifelse(p_value < 0.05, "*", "")),
  #          nudge_x =0, nudge_y = upper_sal_dig , hjust = 0, size = 5, color = "black")


sec_salmonella_rr <- ggplot(secondary_models_2_sal, aes(x = proc_step, y = estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2, position = position_dodge(0.05)) +
  ylim(0, 2) +
  scale_x_discrete(limits = rev(levels(secondary_models_2_sal$proc_step))) +
  coord_flip() +
  geom_hline(yintercept=1, linetype="dashed", size=0.5) +
  labs(y="RR (%95CI)") +
  theme(axis.text.y=element_blank(),axis.title.y = element_blank()) 
#+
#  geom_text(aes(label = ifelse(p_value < 0.05, "*", "")),
 # nudge_x =0, nudge_y = upper_sal_sec , hjust = 0, size = 5, color = "black")


sal_figure_step <- ggarrange( sp_salmonella_plot + rremove("ylab"),
                              sp_salmonella_rr + rremove("ylab"),
                              dig_salmonella_plot + rremove("ylab"),
                              dig_salmonella_rr + rremove("ylab"), 
                              sec_salmonella_plot + rremove("ylab"),
                              sec_salmonella_rr + rremove("ylab"),
                              ncol = 2, nrow = 3,align = "h",labels = c("B", "", "D","","F")) 

sal_figure_step_annotated <- annotate_figure(sal_figure_step, top = text_grob("SAL", 
                                                 color = "black",  size = 20))


ggsave(plot = last_plot(),"./figures/sal_figure_step.png",width = 15, height = 15, units = "cm") 


# Merge figures

all_figures_step <- ggarrange( map_figure_step_annotated + rremove("ylab") + rremove("xlab"),
                              sal_figure_step_annotated + rremove("ylab") + rremove("xlab"),
                              ncol = 2, nrow = 1,align = "h") 

all_figures_step

ggsave(plot = last_plot(),"./figures/all_figure_step.png",width = 30, height = 15, units = "cm") 

Overall

# ---- Salmonella ----

salmonella_3 <- as.data.frame(table(salmonella_2_farm_long$step,salmonella_2_farm_long$positive, salmonella_2_farm_long$system_group))


table_salmonella_overall <- salmonella_3 %>%
  rename(system_group="Var3",step="Var1",positive="Var2") %>%
  group_by(step,system_group) %>%
  mutate(total_samples = sum(Freq)) %>%
  filter(positive==1) %>%
 mutate(perc_numeric = (Freq / total_samples) * 100,
  perc_pos = ifelse(perc_numeric == 0 | perc_numeric == 100,
                    as.character(round(perc_numeric, 0)),
                    sprintf("%.1f", perc_numeric))
) %>%
  select(!c(positive)) %>%
  filter(perc_pos != "NaN" ) 

table_salmonella_overall$system_group <- factor(table_salmonella_overall$system_group,c("GRN", "DIG-only", "SEC-only","DIG+SEC"))


table_salmonella_overall$n_perc <- paste0(table_salmonella_overall$perc_pos,"% ","(",table_salmonella_overall$Freq,"/",table_salmonella_overall$total_samples,")")


overall_sal_slurry <- table_salmonella_overall %>% 
  filter(step=="farm_baseline_positive") %>%
  ggplot(aes(x=system_group, y=perc_numeric, fill=perc_numeric)) + 
  geom_bar(stat = "identity") +
  coord_flip() +
  ylim(0,100) +
  labs(y="Prevalence (%)",title="Raw slurry", fill="Processing step") +
  scale_x_discrete(limits = rev) +
  theme(axis.title.y = element_blank(),legend.position = "none")  +
  scale_fill_gradient(low="yellow", 
                      high="red", space ="Lab" ,limits=c(0,100)) +
  geom_text(aes(label=n_perc), position=position_dodge(width=0.9), vjust=0.5,hjust=2,size=3)


overall_sal_rtu <- table_salmonella_overall %>% 
  filter(step=="farm_rtu_positive") %>%
  ggplot(aes(x=system_group, y=perc_numeric, fill=perc_numeric)) + 
  geom_bar(stat = "identity") +
  coord_flip() +
  ylim(0,100) +
  labs(y="Prevalence (%)",title="RTU", fill="Processing step") +
  scale_x_discrete(limits = rev) +
  theme(axis.title.y = element_blank(),legend.position = "none")  +
  scale_fill_gradient(low="yellow", 
                      high="red", space ="Lab" ,limits=c(0,100)) +
  geom_text(aes(label=n_perc), position=position_dodge(width=0.9), vjust=0.5,hjust=-0.1,size=3)


sal_figure_overall <- ggarrange( overall_sal_slurry + rremove("ylab")+rremove("xlab"),
                                 overall_sal_rtu + rremove("ylab")+rremove("xlab"),
                                 ncol = 2, nrow = 1,align = "h",labels = c("C","D")) 

  sal_figure_overall_annotated <- annotate_figure(sal_figure_overall, top = text_grob("SAL", 
                                                 color = "black",  size = 20))

sal_figure_overall_annotated

ggsave(plot = last_plot(),"./figures/sal_figure_overall.png",width = 20, height = 10, units = "cm")



all_figure_overall <- ggarrange( map_figure_overall_annotated + rremove("ylab") + rremove("xlab"),
                                 sal_figure_overall_annotated + rremove("ylab") + rremove("xlab"),
                              ncol = 1, nrow = 2,align = "h") 

all_figure_overall_annotated <- annotate_figure(all_figure_overall, bottom = text_grob("Prevalence (%)", color = "black",
                                                                                      hjust = 0.75, x = 0.5),
                                                left = text_grob("Processing system", color = "black", rot = 90))

all_figure_overall_annotated

ggsave(plot = last_plot(),"./figures/all_figure_overall.png",width = 30, height = 15, units = "cm") 

Minimum detectable difference

# --- Minimum difference: pre vs post step MAP & SAL ----

# Given parameters
alpha <- 0.05   # Significance level
power <- 0.80   # Desired power
n_samples <- c(25, 16, 11) # Sample sizes per group (per group)

# Function to calculate minimum detectable difference
calculate_min_diff <- function(n_per_group, alpha, power) {
  # Find detectable Cohen's h
  h_detectable <- pwr.2p.test(n = n_per_group, sig.level = alpha, power = power)$h
  
  # Assume p1 ~ 0.80
  p1 <- 0.80
  asin_sqrt_p2 <- asin(sqrt(p1)) - h_detectable/2
  p2 <- (sin(asin_sqrt_p2))^2
  
  min_detectable_difference <- abs(p1 - p2)
  return(min_detectable_difference)
}

# Apply function to each sample size
results <- sapply(n_samples, calculate_min_diff, alpha = alpha, power = power)

# Show results
data.frame(
  Samples_per_group = n_samples,
  Minimum_detectable_difference = round(results, 3)
)
##   Samples_per_group Minimum_detectable_difference
## 1                25                         0.374
## 2                16                         0.470
## 3                11                         0.562
#---- Minimum detectable difference: mastitis pathogens ----

# Parameters
power <- 0.8  # Desired power
alpha <- 0.05 # Significance level
n <- 8        # Sample size per group (already per group)

# Make sure mastitis_rtu is loaded!
# sds <- c(...) will fail if mastitis_rtu doesn't exist

# Standard deviations for each pathogen
sds <- c(
  Coliforms = sd(mastitis_rtu$Coliforms, na.rm = TRUE),
  Klebsiella_sp = sd(mastitis_rtu$`Klebsiella sp.`, na.rm = TRUE),
  Streptococcus_sp = sd(mastitis_rtu$`Streptococcus sp.`, na.rm = TRUE),
  Staphylococcus_sp = sd(mastitis_rtu$`Staphylococcus sp.`, na.rm = TRUE)
)

# Loop through pathogens and calculate minimum detectable difference
for (pathogen in names(sds)) {
  sd_value <- sds[[pathogen]]
  effect_size <- pwr.t.test(n = n, d = NULL, sig.level = alpha, power = power, type = "two.sample")$d
  detectable_difference <- effect_size * sd_value
  cat(paste0("Minimum detectable difference for ", pathogen, ": ", round(detectable_difference, 2), "\n"))
}
## Minimum detectable difference for Coliforms: 2.97
## Minimum detectable difference for Klebsiella_sp: 1.79
## Minimum detectable difference for Streptococcus_sp: 2.65
## Minimum detectable difference for Staphylococcus_sp: 1.03