1 Assembly process statistics

1.1 All contigs

# ---- Results ----

# ---- Quast descriptive stats ----

table1(~`# contigs (>= 0 bp)` + N50 + L50 + `Largest contig` + `Total length (>= 0 bp)`, data= quast_wgs)
Overall
(N=84)
# contigs (>= 0 bp)
Mean (SD) 120 (333)
Median [Min, Max] 45.0 [19.0, 2590]
N50
Mean (SD) 585000 (417000)
Median [Min, Max] 390000 [20900, 1640000]
L50
Mean (SD) 3.24 (4.36)
Median [Min, Max] 3.00 [1.00, 39.0]
Largest contig
Mean (SD) 883000 (367000)
Median [Min, Max] 836000 [77900, 1640000]
Total length (>= 0 bp)
Mean (SD) 2640000 (291000)
Median [Min, Max] 2660000 [1830000, 3470000]
table1(~`# contigs (>= 0 bp)` + N50 + L50 + `Largest contig` + `Total length (>= 0 bp)`, data= quast_wgs_filtered)
Overall
(N=84)
# contigs (>= 0 bp)
Mean (SD) 48.4 (137)
Median [Min, Max] 20.0 [7.00, 1100]
N50
Mean (SD) 585000 (417000)
Median [Min, Max] 390000 [20900, 1640000]
L50
Mean (SD) 3.24 (4.36)
Median [Min, Max] 3.00 [1.00, 39.0]
Largest contig
Mean (SD) 883000 (367000)
Median [Min, Max] 836000 [77900, 1640000]
Total length (>= 0 bp)
Mean (SD) 2610000 (276000)
Median [Min, Max] 2650000 [1830000, 3230000]

1.2 Contigs >500 bp (used for analysis)

# ---- Quast descriptive stats ----

table1(~`# contigs (>= 0 bp)` + N50 + L50 + `Largest contig` + `Total length (>= 0 bp)`, data= quast_wgs_filtered)
Overall
(N=84)
# contigs (>= 0 bp)
Mean (SD) 48.4 (137)
Median [Min, Max] 20.0 [7.00, 1100]
N50
Mean (SD) 585000 (417000)
Median [Min, Max] 390000 [20900, 1640000]
L50
Mean (SD) 3.24 (4.36)
Median [Min, Max] 3.00 [1.00, 39.0]
Largest contig
Mean (SD) 883000 (367000)
Median [Min, Max] 836000 [77900, 1640000]
Total length (>= 0 bp)
Mean (SD) 2610000 (276000)
Median [Min, Max] 2650000 [1830000, 3230000]

2 AMPs genes

# Presence of AMP related genes

# Prevalence AMPs

table1(~mic_sau + mic_sub+sau_inhibition_category+sub_inhibition_category+case_control+aip_1+aip_2+aip_3+aip_4+sactipeptides+subtilosin_a+putative_bacteriocin_193+putative_bacteriocin_194|wgs_species_3,data=subset(metadata_bagel,metadata_bagel$wgs_species_3!="SAU"))
SCH
(N=21)
SCO
(N=1)
SDEV
(N=2)
SEQ
(N=1)
SHAEM
(N=19)
SPXYL
(N=5)
MSC
(N=6)
SSUC
(N=12)
SXYL
(N=13)
Overall
(N=80)
mic_sau
Mean (SD) 3.32 (0.497) 4.59 (NA) 4.29 (0.297) NA (NA) 3.97 (0.649) 3.06 (0.522) 3.33 (0.560) 2.92 (0.692) 3.05 (0.394) 3.37 (0.676)
Median [Min, Max] 3.25 [2.59, 4.51] 4.59 [4.59, 4.59] 4.29 [4.08, 4.50] NA [NA, NA] 4.14 [2.16, 4.66] 3.01 [2.61, 3.93] 3.49 [2.60, 4.03] 3.03 [1.92, 4.11] 2.90 [2.71, 4.09] 3.24 [1.92, 4.66]
Missing 2 (9.5%) 0 (0%) 0 (0%) 1 (100%) 4 (21.1%) 0 (0%) 0 (0%) 1 (8.3%) 0 (0%) 8 (10.0%)
mic_sub
Mean (SD) 3.06 (0.604) NA (NA) 3.29 (0.297) NA (NA) 3.51 (0.775) 3.06 (0.522) 3.49 (0.480) 2.92 (0.931) 3.05 (0.698) 3.13 (0.687)
Median [Min, Max] 3.00 [2.51, 5.18] NA [NA, NA] 3.29 [3.08, 3.50] NA [NA, NA] 3.71 [2.16, 4.27] 3.01 [2.61, 3.93] 3.59 [2.60, 4.03] 2.58 [1.65, 4.14] 2.93 [1.71, 4.09] 3.10 [1.65, 5.18]
Missing 2 (9.5%) 1 (100%) 0 (0%) 1 (100%) 13 (68.4%) 0 (0%) 0 (0%) 1 (8.3%) 0 (0%) 18 (22.5%)
sau_inhibition_category
11-20 2 (9.5%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (20.0%) 1 (16.7%) 0 (0%) 6 (46.2%) 10 (12.5%)
21-30 2 (9.5%) 0 (0%) 0 (0%) 0 (0%) 1 (5.3%) 2 (40.0%) 0 (0%) 3 (25.0%) 2 (15.4%) 10 (12.5%)
31-40 6 (28.6%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (16.7%) 0 (0%) 3 (23.1%) 10 (12.5%)
41-50 4 (19.0%) 0 (0%) 0 (0%) 0 (0%) 1 (5.3%) 0 (0%) 2 (33.3%) 2 (16.7%) 1 (7.7%) 10 (12.5%)
51-60 1 (4.8%) 0 (0%) 1 (50.0%) 0 (0%) 4 (21.1%) 1 (20.0%) 1 (16.7%) 1 (8.3%) 1 (7.7%) 10 (12.5%)
61-70 2 (9.5%) 0 (0%) 1 (50.0%) 0 (0%) 7 (36.8%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 10 (12.5%)
Top 10 2 (9.5%) 0 (0%) 0 (0%) 0 (0%) 1 (5.3%) 1 (20.0%) 1 (16.7%) 5 (41.7%) 0 (0%) 10 (12.5%)
>70 0 (0%) 1 (100%) 0 (0%) 0 (0%) 1 (5.3%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 2 (2.5%)
Undetermined MIC 0 (0%) 0 (0%) 0 (0%) 1 (100%) 4 (21.1%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 5 (6.3%)
Missing 2 (9.5%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (8.3%) 0 (0%) 3 (3.8%)
sub_inhibition_category
>70 1 (4.8%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (1.3%)
11-20 5 (23.8%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 2 (40.0%) 1 (16.7%) 1 (8.3%) 1 (7.7%) 10 (12.5%)
21-30 3 (14.3%) 0 (0%) 1 (50.0%) 0 (0%) 0 (0%) 2 (40.0%) 0 (0%) 0 (0%) 4 (30.8%) 10 (12.5%)
31-40 5 (23.8%) 0 (0%) 0 (0%) 0 (0%) 1 (5.3%) 0 (0%) 0 (0%) 1 (8.3%) 2 (15.4%) 9 (11.3%)
41-50 3 (14.3%) 0 (0%) 1 (50.0%) 0 (0%) 1 (5.3%) 0 (0%) 3 (50.0%) 1 (8.3%) 0 (0%) 9 (11.3%)
Top 10 2 (9.5%) 0 (0%) 0 (0%) 0 (0%) 1 (5.3%) 0 (0%) 0 (0%) 5 (41.7%) 2 (15.4%) 10 (12.5%)
Undetermined MIC 0 (0%) 1 (100%) 0 (0%) 1 (100%) 13 (68.4%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 15 (18.8%)
51-60 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (5.3%) 1 (20.0%) 2 (33.3%) 1 (8.3%) 3 (23.1%) 8 (10.0%)
61-70 0 (0%) 0 (0%) 0 (0%) 0 (0%) 2 (10.5%) 0 (0%) 0 (0%) 2 (16.7%) 1 (7.7%) 5 (6.3%)
Missing 2 (9.5%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (8.3%) 0 (0%) 3 (3.8%)
case_control
Control 12 (57.1%) 0 (0%) 2 (100%) 1 (100%) 10 (52.6%) 2 (40.0%) 3 (50.0%) 6 (50.0%) 6 (46.2%) 42 (52.5%)
Case 9 (42.9%) 1 (100%) 0 (0%) 0 (0%) 9 (47.4%) 3 (60.0%) 3 (50.0%) 6 (50.0%) 7 (53.8%) 38 (47.5%)
aip_1
0 5 (23.8%) 1 (100%) 2 (100%) 1 (100%) 0 (0%) 5 (100%) 3 (50.0%) 12 (100%) 13 (100%) 42 (52.5%)
1 16 (76.2%) 0 (0%) 0 (0%) 0 (0%) 19 (100%) 0 (0%) 3 (50.0%) 0 (0%) 0 (0%) 38 (47.5%)
aip_2
0 16 (76.2%) 0 (0%) 2 (100%) 1 (100%) 19 (100%) 0 (0%) 6 (100%) 0 (0%) 6 (46.2%) 50 (62.5%)
1 5 (23.8%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 5 (100%) 0 (0%) 12 (100%) 7 (53.8%) 30 (37.5%)
aip_3
0 21 (100%) 1 (100%) 2 (100%) 1 (100%) 19 (100%) 5 (100%) 6 (100%) 12 (100%) 8 (61.5%) 75 (93.8%)
1 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 5 (38.5%) 5 (6.3%)
aip_4
0 21 (100%) 1 (100%) 0 (0%) 1 (100%) 19 (100%) 5 (100%) 6 (100%) 12 (100%) 12 (92.3%) 77 (96.3%)
1 0 (0%) 0 (0%) 2 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (7.7%) 3 (3.8%)
sactipeptides
Mean (SD) 1.00 (0) 1.00 (NA) 1.00 (0) 1.00 (NA) 1.00 (0) 1.00 (0) 1.00 (0) 1.00 (0) 1.00 (0) 1.00 (0)
Median [Min, Max] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00]
subtilosin_a
1 13 (61.9%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 13 (16.3%)
0 8 (38.1%) 1 (100%) 2 (100%) 1 (100%) 19 (100%) 5 (100%) 6 (100%) 12 (100%) 13 (100%) 67 (83.8%)
putative_bacteriocin_193
0 21 (100%) 1 (100%) 2 (100%) 1 (100%) 19 (100%) 5 (100%) 2 (33.3%) 12 (100%) 13 (100%) 76 (95.0%)
1 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 4 (66.7%) 0 (0%) 0 (0%) 4 (5.0%)
putative_bacteriocin_194
0 21 (100%) 1 (100%) 2 (100%) 0 (0%) 19 (100%) 5 (100%) 6 (100%) 12 (100%) 6 (46.2%) 72 (90.0%)
1 0 (0%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 7 (53.8%) 8 (10.0%)
# AIP1

table(metadata_bagel$wgs_species_2,metadata_bagel$aip_1)
##                                       
##                                         0  1
##   Other                                 4  0
##   Staphylococcus chromogenes            5 16
##   Staphylococcus haemolyticus           0 19
##   Staphylococcus sciuri                 3  3
##   Staphylococcus succinus              12  0
##   Staphylococcus xylosus/pseudoxylosus 18  0
fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$aip_1))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$aip_1)
## p-value = 1.063e-15
## alternative hypothesis: two.sided
# AIP2

table(metadata_bagel$wgs_species_2,metadata_bagel$aip_2)
##                                       
##                                         0  1
##   Other                                 3  1
##   Staphylococcus chromogenes           16  5
##   Staphylococcus haemolyticus          19  0
##   Staphylococcus sciuri                 6  0
##   Staphylococcus succinus               0 12
##   Staphylococcus xylosus/pseudoxylosus  6 12
fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$aip_2))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$aip_2)
## p-value = 2.758e-10
## alternative hypothesis: two.sided
# AIP3

table(metadata_bagel$wgs_species_2,metadata_bagel$aip_3)
##                                       
##                                         0  1
##   Other                                 4  0
##   Staphylococcus chromogenes           21  0
##   Staphylococcus haemolyticus          19  0
##   Staphylococcus sciuri                 6  0
##   Staphylococcus succinus              12  0
##   Staphylococcus xylosus/pseudoxylosus 13  5
fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$aip_3))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$aip_3)
## p-value = 0.007501
## alternative hypothesis: two.sided
# AIP4

table(metadata_bagel$wgs_species_2,metadata_bagel$aip_4)
##                                       
##                                         0  1
##   Other                                 2  2
##   Staphylococcus chromogenes           21  0
##   Staphylococcus haemolyticus          19  0
##   Staphylococcus sciuri                 6  0
##   Staphylococcus succinus              12  0
##   Staphylococcus xylosus/pseudoxylosus 17  1
fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$aip_4))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$aip_4)
## p-value = 0.003651
## alternative hypothesis: two.sided
# Subtilosin A

table(metadata_bagel$wgs_species_2,metadata_bagel$subtilosin_a)
##                                       
##                                         1  0
##   Other                                 0  4
##   Staphylococcus chromogenes           13  8
##   Staphylococcus haemolyticus           0 19
##   Staphylococcus sciuri                 0  6
##   Staphylococcus succinus               0 12
##   Staphylococcus xylosus/pseudoxylosus  0 18
fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$subtilosin_a))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$subtilosin_a)
## p-value = 4.126e-08
## alternative hypothesis: two.sided
# Putative bacteriocin 193

table(metadata_bagel$wgs_species_2,metadata_bagel$putative_bacteriocin_193)
##                                       
##                                         0  1
##   Other                                 4  0
##   Staphylococcus chromogenes           21  0
##   Staphylococcus haemolyticus          19  0
##   Staphylococcus sciuri                 2  4
##   Staphylococcus succinus              12  0
##   Staphylococcus xylosus/pseudoxylosus 18  0
fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$putative_bacteriocin_193))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$putative_bacteriocin_193)
## p-value = 1.012e-05
## alternative hypothesis: two.sided
# Putative bacteriocin 194

table(metadata_bagel$wgs_species_2,metadata_bagel$putative_bacteriocin_194)
##                                       
##                                         0  1
##   Other                                 3  1
##   Staphylococcus chromogenes           21  0
##   Staphylococcus haemolyticus          19  0
##   Staphylococcus sciuri                 6  0
##   Staphylococcus succinus              12  0
##   Staphylococcus xylosus/pseudoxylosus 11  7
fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$putative_bacteriocin_194))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$putative_bacteriocin_194)
## p-value = 0.0002158
## alternative hypothesis: two.sided

2.1 SCH

# --- Inhibitory activity SCH ----

# AIP1

table1(~aip_1|case_control,data=mb_chromogenes)
Control
(N=12)
Case
(N=9)
Overall
(N=21)
aip_1
0 1 (8.3%) 4 (44.4%) 5 (23.8%)
1 11 (91.7%) 5 (55.6%) 16 (76.2%)
model_1 <- glm(aip_1 ~ case_control,family=binomial(link="logit"),data=mb_chromogenes)
summary(model_1)
## 
## Call:
## glm(formula = aip_1 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_chromogenes)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)  
## (Intercept)         2.398      1.044   2.296   0.0217 *
## case_controlCase   -2.175      1.241  -1.752   0.0798 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 23.053  on 20  degrees of freedom
## Residual deviance: 19.249  on 19  degrees of freedom
## AIC: 23.249
## 
## Number of Fisher Scoring iterations: 5
exp(Confint(model_1))
##                    Estimate       2.5 %     97.5 %
## (Intercept)      11.0000000 2.140159987 201.083018
## case_controlCase  0.1136364 0.005045515   1.010237
effectsize::oddsratio_to_riskratio(model_1)
## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.92) - make sure your intercept
##   is meaningful.
## CIs are back-transformed from the logit scale.
## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.92 |             
## case control [Case] |       0.61 | [0.06, 1.00]
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.
emmeans(model_1,~case_control,type="response")
##  case_control  prob     SE  df asymp.LCL asymp.UCL
##  Control      0.917 0.0798 Inf     0.587     0.988
##  Case         0.556 0.1656 Inf     0.251     0.823
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
# AIP2

table1(~aip_2|case_control,data=mb_chromogenes)
Control
(N=12)
Case
(N=9)
Overall
(N=21)
aip_2
0 11 (91.7%) 5 (55.6%) 16 (76.2%)
1 1 (8.3%) 4 (44.4%) 5 (23.8%)
model_2 <- glm(aip_2 ~ case_control,family=binomial(link="logit"),data=mb_chromogenes)
summary(model_2)
## 
## Call:
## glm(formula = aip_2 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_chromogenes)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)  
## (Intercept)        -2.398      1.044  -2.296   0.0217 *
## case_controlCase    2.175      1.241   1.752   0.0798 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 23.053  on 20  degrees of freedom
## Residual deviance: 19.249  on 19  degrees of freedom
## AIC: 23.249
## 
## Number of Fisher Scoring iterations: 5
exp(Confint(model_2))
##                    Estimate      2.5 %      97.5 %
## (Intercept)      0.09090909 0.00497307   0.4672548
## case_controlCase 8.80000000 0.98986680 198.1958191
effectsize::oddsratio_to_riskratio(model_2)
## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.08) - make sure your intercept
##   is meaningful.
## CIs are back-transformed from the logit scale.
## Parameter           | Risk Ratio |        95% CI
## ------------------------------------------------
## (Intercept)         |       0.08 |              
## case control [Case] |       5.33 | [0.99, 11.37]
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.
emmeans(model_2,~case_control,type="response")
##  case_control   prob     SE  df asymp.LCL asymp.UCL
##  Control      0.0833 0.0798 Inf    0.0116     0.413
##  Case         0.4444 0.1656 Inf    0.1768     0.749
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
# Subtilosin A

table1(~subtilosin_a|case_control,data=mb_chromogenes)
Control
(N=12)
Case
(N=9)
Overall
(N=21)
subtilosin_a
1 7 (58.3%) 6 (66.7%) 13 (61.9%)
0 5 (41.7%) 3 (33.3%) 8 (38.1%)
model_3 <- glm(subtilosin_a ~ case_control,family=binomial(link="logit"),data=mb_chromogenes)
summary(model_3)
## 
## Call:
## glm(formula = subtilosin_a ~ case_control, family = binomial(link = "logit"), 
##     data = mb_chromogenes)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)       -0.3365     0.5855  -0.575    0.566
## case_controlCase  -0.3567     0.9181  -0.389    0.698
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 27.910  on 20  degrees of freedom
## Residual deviance: 27.758  on 19  degrees of freedom
## AIC: 31.758
## 
## Number of Fisher Scoring iterations: 4
exp(Confint(model_3))
##                   Estimate     2.5 %   97.5 %
## (Intercept)      0.7142857 0.2113989 2.237832
## case_controlCase 0.7000000 0.1053801 4.183982
effectsize::oddsratio_to_riskratio(model_3)
## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.42) - make sure your intercept
##   is meaningful.
## CIs are back-transformed from the logit scale.
## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.42 |             
## case control [Case] |       0.80 | [0.17, 1.80]
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.
emmeans(model_3,~case_control,type="response")
##  case_control  prob    SE  df asymp.LCL asymp.UCL
##  Control      0.417 0.142 Inf     0.185     0.692
##  Case         0.333 0.157 Inf     0.111     0.667
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
# Association between AMPs and MIC in SCH

# AIP1

model_1 <- lm(mic_sau ~ aip_1,data=mb_chromogenes)
summary(model_1)
## 
## Call:
## lm(formula = mic_sau ~ aip_1, data = mb_chromogenes)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7647 -0.3097 -0.0300  0.2353  1.1553 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.1900     0.2533  12.594 4.79e-10 ***
## aip_11        0.1647     0.2851   0.578    0.571    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5066 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01925,    Adjusted R-squared:  -0.03844 
## F-statistic: 0.3337 on 1 and 17 DF,  p-value: 0.5711
model_1 <- lm(mic_sub ~ aip_1,data=mb_chromogenes)
summary(model_1)
## 
## Call:
## lm(formula = mic_sub ~ aip_1, data = mb_chromogenes)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -0.578 -0.383 -0.088  0.206  2.092 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.9400     0.3093   9.507 3.23e-08 ***
## aip_11        0.1480     0.3481   0.425    0.676    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6185 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01052,    Adjusted R-squared:  -0.04768 
## F-statistic: 0.1808 on 1 and 17 DF,  p-value: 0.676
# AIP2

model_2 <- lm(mic_sau ~ aip_2,data=mb_chromogenes)
summary(model_2)
## 
## Call:
## lm(formula = mic_sau ~ aip_2, data = mb_chromogenes)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7647 -0.3097 -0.0300  0.2353  1.1553 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.3547     0.1308  25.647 4.97e-15 ***
## aip_21       -0.1647     0.2851  -0.578    0.571    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5066 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01925,    Adjusted R-squared:  -0.03844 
## F-statistic: 0.3337 on 1 and 17 DF,  p-value: 0.5711
model_2 <- lm(mic_sub ~ aip_2,data=mb_chromogenes)
summary(model_2)
## 
## Call:
## lm(formula = mic_sub ~ aip_2, data = mb_chromogenes)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -0.578 -0.383 -0.088  0.206  2.092 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.0880     0.1597  19.336 5.19e-13 ***
## aip_21       -0.1480     0.3481  -0.425    0.676    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6185 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01052,    Adjusted R-squared:  -0.04768 
## F-statistic: 0.1808 on 1 and 17 DF,  p-value: 0.676
# Subtilosin A

model_3 <- lm(mic_sau ~ subtilosin_a,data=mb_chromogenes)
summary(model_3)
## 
## Call:
## lm(formula = mic_sau ~ subtilosin_a, data = mb_chromogenes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84273 -0.33886  0.07727  0.20614  1.07727 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     3.4327     0.1484  23.137 2.73e-14 ***
## subtilosin_a0  -0.2677     0.2286  -1.171    0.258    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4921 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.07463,    Adjusted R-squared:  0.0202 
## F-statistic: 1.371 on 1 and 17 DF,  p-value: 0.2578
model_3 <- lm(mic_sub ~ subtilosin_a,data=mb_chromogenes)
summary(model_3)
## 
## Call:
## lm(formula = mic_sub ~ subtilosin_a, data = mb_chromogenes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55909 -0.41455 -0.06909  0.18545  2.11091 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.06909    0.18742  16.375 7.64e-12 ***
## subtilosin_a0 -0.02909    0.28884  -0.101    0.921    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6216 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.0005964,  Adjusted R-squared:  -0.05819 
## F-statistic: 0.01014 on 1 and 17 DF,  p-value: 0.921
# Clades within this group


# List of target isolates

target_isolates <- c("SCH1", "SCH3", "SCH5", "SCH8", "SCH15", "SCH16", "SCH17", "SCH18", "SCH20")

# Create a new column 'presence' based on target isolates
mb_chromogenes <- mb_chromogenes %>%
  mutate(clade = ifelse(wgs_isolate_id %in% target_isolates, "Present", "Absent"))


# List of target isolates
target_isolates_2 <- c("SCH2", "SCH11", "SCH21", "SCH9")

# Create a new column 'presence' based on target isolates
mb_chromogenes <- mb_chromogenes %>%
  mutate(clade_2 = ifelse(wgs_isolate_id %in% target_isolates_2, "Present", "Absent"))


table1(~mic_sau + mic_sub+sau_inhibition_category+sub_inhibition_category+subtilosin_a|clade,data=subset(mb_chromogenes,mb_chromogenes$wgs_species_3!="SAU"))
Absent
(N=12)
Present
(N=9)
Overall
(N=21)
mic_sau
Mean (SD) 3.30 (0.409) 3.35 (0.629) 3.32 (0.497)
Median [Min, Max] 3.25 [2.74, 4.18] 3.37 [2.59, 4.51] 3.25 [2.59, 4.51]
Missing 1 (8.3%) 1 (11.1%) 2 (9.5%)
mic_sub
Mean (SD) 3.21 (0.712) 2.85 (0.362) 3.06 (0.604)
Median [Min, Max] 3.16 [2.61, 5.18] 2.71 [2.51, 3.51] 3.00 [2.51, 5.18]
Missing 1 (8.3%) 1 (11.1%) 2 (9.5%)
sau_inhibition_category
11 to 20 2 (16.7%) 0 (0%) 2 (9.5%)
21 to 30 1 (8.3%) 1 (11.1%) 2 (9.5%)
31 to 40 5 (41.7%) 1 (11.1%) 6 (28.6%)
41 to 50 2 (16.7%) 2 (22.2%) 4 (19.0%)
61 to 70 1 (8.3%) 1 (11.1%) 2 (9.5%)
51 to 60 0 (0%) 1 (11.1%) 1 (4.8%)
Top 10 0 (0%) 2 (22.2%) 2 (9.5%)
Missing 1 (8.3%) 1 (11.1%) 2 (9.5%)
sub_inhibition_category
>70 1 (8.3%) 0 (0%) 1 (4.8%)
11 to 20 3 (25.0%) 2 (22.2%) 5 (23.8%)
21 to 30 1 (8.3%) 2 (22.2%) 3 (14.3%)
31 to 40 4 (33.3%) 1 (11.1%) 5 (23.8%)
41 to 50 2 (16.7%) 1 (11.1%) 3 (14.3%)
Top 10 0 (0%) 2 (22.2%) 2 (9.5%)
Missing 1 (8.3%) 1 (11.1%) 2 (9.5%)
subtilosin_a
1 4 (33.3%) 9 (100%) 13 (61.9%)
0 8 (66.7%) 0 (0%) 8 (38.1%)
# Isolates inside and outside clade

model <- lm(mic_sau~clade,data=mb_chromogenes)
summary(model)
## 
## Call:
## lm(formula = mic_sau ~ clade, data = mb_chromogenes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76125 -0.27926 -0.04727  0.26074  1.15875 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.29727    0.15400  21.411  9.8e-14 ***
## cladePresent  0.05398    0.23733   0.227    0.823    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5108 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.003034,   Adjusted R-squared:  -0.05561 
## F-statistic: 0.05173 on 1 and 17 DF,  p-value: 0.8228
Confint(model)
##                Estimate      2.5 %    97.5 %
## (Intercept)  3.29727273  2.9723606 3.6221848
## cladePresent 0.05397727 -0.4467459 0.5547004
emmeans(model,~clade)
##  clade   emmean    SE df lower.CL upper.CL
##  Absent    3.30 0.154 17     2.97     3.62
##  Present   3.35 0.181 17     2.97     3.73
## 
## Confidence level used: 0.95
model <- lm(mic_sub~clade,data=mb_chromogenes)
summary(model)
## 
## Call:
## lm(formula = mic_sub ~ clade, data = mb_chromogenes)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5964 -0.3162 -0.1013  0.1112  1.9736 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.2064     0.1790  17.917 1.79e-12 ***
## cladePresent  -0.3551     0.2758  -1.288    0.215    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5935 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.08886,    Adjusted R-squared:  0.03527 
## F-statistic: 1.658 on 1 and 17 DF,  p-value: 0.2151
Confint(model)
##                Estimate      2.5 %    97.5 %
## (Intercept)   3.2063636  2.8288051 3.5839221
## cladePresent -0.3551136 -0.9369704 0.2267431
emmeans(model,~clade)
##  clade   emmean    SE df lower.CL upper.CL
##  Absent    3.21 0.179 17     2.83     3.58
##  Present   2.85 0.210 17     2.41     3.29
## 
## Confidence level used: 0.95
# AIP1

table1(~aip_1|clade,data=mb_chromogenes)
Absent
(N=12)
Present
(N=9)
Overall
(N=21)
aip_1
0 5 (41.7%) 0 (0%) 5 (23.8%)
1 7 (58.3%) 9 (100%) 16 (76.2%)
fisher.test(table(mb_chromogenes$aip_1,mb_chromogenes$clade))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_chromogenes$aip_1, mb_chromogenes$clade)
## p-value = 0.04511
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.8206881       Inf
## sample estimates:
## odds ratio 
##        Inf
# AIP2

table1(~aip_2|clade,data=mb_chromogenes)
Absent
(N=12)
Present
(N=9)
Overall
(N=21)
aip_2
0 7 (58.3%) 9 (100%) 16 (76.2%)
1 5 (41.7%) 0 (0%) 5 (23.8%)
fisher.test(table(mb_chromogenes$aip_2,mb_chromogenes$clade))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_chromogenes$aip_2, mb_chromogenes$clade)
## p-value = 0.04511
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.00000 1.21849
## sample estimates:
## odds ratio 
##          0
# Subtilosin A

table1(~subtilosin_a|clade,data=mb_chromogenes)
Absent
(N=12)
Present
(N=9)
Overall
(N=21)
subtilosin_a
1 4 (33.3%) 9 (100%) 13 (61.9%)
0 8 (66.7%) 0 (0%) 8 (38.1%)
fisher.test(table(mb_chromogenes$subtilosin_a,mb_chromogenes$clade))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_chromogenes$subtilosin_a, mb_chromogenes$clade)
## p-value = 0.0046
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.0000000 0.4517167
## sample estimates:
## odds ratio 
##          0
# Isolates inside and outside clade 2

model <- lm(mic_sau~clade_2,data=mb_chromogenes)
summary(model)
## 
## Call:
## lm(formula = mic_sau ~ clade_2, data = mb_chromogenes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.75813 -0.30313 -0.06813  0.24188  1.16187 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      3.3481     0.1267  26.420 3.03e-15 ***
## clade_2Present  -0.1781     0.3189  -0.559    0.584    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5069 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01802,    Adjusted R-squared:  -0.03974 
## F-statistic: 0.3119 on 1 and 17 DF,  p-value: 0.5838
Confint(model)
##                 Estimate      2.5 %    97.5 %
## (Intercept)     3.348125  3.0807545 3.6154955
## clade_2Present -0.178125 -0.8509926 0.4947426
emmeans(model,~clade_2)
##  clade_2 emmean    SE df lower.CL upper.CL
##  Absent    3.35 0.127 17     3.08     3.62
##  Present   3.17 0.293 17     2.55     3.79
## 
## Confidence level used: 0.95
model <- lm(mic_sub~clade_2,data=mb_chromogenes)
summary(model)
## 
## Call:
## lm(formula = mic_sub ~ clade_2, data = mb_chromogenes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.58813 -0.39312 -0.09667  0.16687  2.08187 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      3.0981     0.1534  20.197 2.55e-13 ***
## clade_2Present  -0.2615     0.3860  -0.677    0.507    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6136 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.02628,    Adjusted R-squared:  -0.031 
## F-statistic: 0.4587 on 1 and 17 DF,  p-value: 0.5073
Confint(model)
##                  Estimate     2.5 %    97.5 %
## (Intercept)     3.0981250  2.774496 3.4217537
## clade_2Present -0.2614583 -1.075906 0.5529893
emmeans(model,~clade_2)
##  clade_2 emmean    SE df lower.CL upper.CL
##  Absent    3.10 0.153 17     2.77     3.42
##  Present   2.84 0.354 17     2.09     3.58
## 
## Confidence level used: 0.95
# AIP1

table1(~aip_1|clade_2,data=mb_chromogenes)
Absent
(N=17)
Present
(N=4)
Overall
(N=21)
aip_1
0 1 (5.9%) 4 (100%) 5 (23.8%)
1 16 (94.1%) 0 (0%) 16 (76.2%)
fisher.test(table(mb_chromogenes$aip_1,mb_chromogenes$clade_2))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_chromogenes$aip_1, mb_chromogenes$clade_2)
## p-value = 0.0008354
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.0000000 0.2697048
## sample estimates:
## odds ratio 
##          0
# AIP2

table1(~aip_2|clade_2,data=mb_chromogenes)
Absent
(N=17)
Present
(N=4)
Overall
(N=21)
aip_2
0 16 (94.1%) 0 (0%) 16 (76.2%)
1 1 (5.9%) 4 (100%) 5 (23.8%)
fisher.test(table(mb_chromogenes$aip_2,mb_chromogenes$clade_2))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_chromogenes$aip_2, mb_chromogenes$clade_2)
## p-value = 0.0008354
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  3.707758      Inf
## sample estimates:
## odds ratio 
##        Inf
# Subtilosin A

table1(~subtilosin_a|clade_2,data=mb_chromogenes)
Absent
(N=17)
Present
(N=4)
Overall
(N=21)
subtilosin_a
1 10 (58.8%) 3 (75.0%) 13 (61.9%)
0 7 (41.2%) 1 (25.0%) 8 (38.1%)
fisher.test(table(mb_chromogenes$subtilosin_a,mb_chromogenes$clade_2))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_chromogenes$subtilosin_a, mb_chromogenes$clade_2)
## p-value = 1
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.007958673 7.734743347
## sample estimates:
## odds ratio 
##  0.4921903

2.2 SHAEM

# --- AMPs in SHAEM----

# none of the AMPs were both present and absent in at least 2 isolates

# Inhibitory activity was absent in most isolates

2.3 SSC

# ---- AMPs in SSC ----

# AIP1

table1(~aip_1|case_control,data=mb_sciuri)
Control
(N=3)
Case
(N=3)
Overall
(N=6)
aip_1
0 1 (33.3%) 2 (66.7%) 3 (50.0%)
1 2 (66.7%) 1 (33.3%) 3 (50.0%)
model_1 <- glm(aip_1 ~ case_control,family=binomial(link="logit"),data=mb_sciuri)
summary(model_1)
## 
## Call:
## glm(formula = aip_1 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_sciuri)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)        0.6931     1.2247   0.566    0.571
## case_controlCase  -1.3863     1.7321  -0.800    0.423
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 8.3178  on 5  degrees of freedom
## Residual deviance: 7.6382  on 4  degrees of freedom
## AIC: 11.638
## 
## Number of Fisher Scoring iterations: 4
exp(Confint(model_1))
##                  Estimate       2.5 %    97.5 %
## (Intercept)          2.00 0.191564939 43.037669
## case_controlCase     0.25 0.004803826  6.562236
effectsize::oddsratio_to_riskratio(model_1)
## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.67) - make sure your intercept
##   is meaningful.
## CIs are back-transformed from the logit scale.
## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.67 |             
## case control [Case] |       0.50 | [0.01, 1.39]
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.
emmeans(model_1,~case_control,type="response")
##  case_control  prob    SE  df asymp.LCL asymp.UCL
##  Control      0.667 0.272 Inf    0.1535     0.957
##  Case         0.333 0.272 Inf    0.0434     0.846
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
# Putative bacteriocin 193

table1(~putative_bacteriocin_193|case_control,data=mb_sciuri)
Control
(N=3)
Case
(N=3)
Overall
(N=6)
putative_bacteriocin_193
0 1 (33.3%) 1 (33.3%) 2 (33.3%)
1 2 (66.7%) 2 (66.7%) 4 (66.7%)
model_2 <- glm(putative_bacteriocin_193 ~ case_control,family=binomial(link="logit"),data=mb_sciuri)
summary(model_2)
## 
## Call:
## glm(formula = putative_bacteriocin_193 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_sciuri)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)
## (Intercept)      6.931e-01  1.225e+00   0.566    0.571
## case_controlCase 1.923e-16  1.732e+00   0.000    1.000
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 7.6382  on 5  degrees of freedom
## Residual deviance: 7.6382  on 4  degrees of freedom
## AIC: 11.638
## 
## Number of Fisher Scoring iterations: 4
exp(Confint(model_2))
##                  Estimate      2.5 %   97.5 %
## (Intercept)             2 0.19156494 43.03767
## case_controlCase        1 0.02475536 40.39530
effectsize::oddsratio_to_riskratio(model_2)
## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.67) - make sure your intercept
##   is meaningful.
## CIs are back-transformed from the logit scale.
## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.67 |             
## case control [Case] |       1.00 | [0.07, 1.48]
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.
emmeans(model_2,~case_control,type="response")
##  case_control  prob    SE  df asymp.LCL asymp.UCL
##  Control      0.667 0.272 Inf     0.154     0.957
##  Case         0.667 0.272 Inf     0.154     0.957
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
# Association between AMPs and MIC in SSCI

# AIP1

model_1 <- lm(mic_sau ~ aip_1,data=mb_sciuri)
summary(model_1)
## 
## Call:
## lm(formula = mic_sau ~ aip_1, data = mb_sciuri)
## 
## Residuals:
##     1     2     3     4     5     6 
## -0.09  0.10  0.92 -0.41 -0.01 -0.51 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.5400     0.3282  10.785 0.000419 ***
## aip_11       -0.4300     0.4642  -0.926 0.406692    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5685 on 4 degrees of freedom
## Multiple R-squared:  0.1766, Adjusted R-squared:  -0.0292 
## F-statistic: 0.8581 on 1 and 4 DF,  p-value: 0.4067
model_1 <- lm(mic_sub ~ aip_1,data=mb_sciuri)
summary(model_1)
## 
## Call:
## lm(formula = mic_sub ~ aip_1, data = mb_sciuri)
## 
## Residuals:
##       1       2       3       4       5       6 
## -0.0900  0.1000  0.5867  0.2567 -0.0100 -0.8433 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.54000    0.30815  11.488 0.000328 ***
## aip_11      -0.09667    0.43579  -0.222 0.835318    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5337 on 4 degrees of freedom
## Multiple R-squared:  0.01215,    Adjusted R-squared:  -0.2348 
## F-statistic: 0.0492 on 1 and 4 DF,  p-value: 0.8353
# putative bacteriocin 193

model_2 <- lm(mic_sau ~ putative_bacteriocin_193,data=mb_sciuri)
summary(model_2)
## 
## Call:
## lm(formula = mic_sau ~ putative_bacteriocin_193, data = mb_sciuri)
## 
## Residuals:
##      1      2      3      4      5      6 
##  0.145  0.335  0.665 -0.665  0.225 -0.705 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                 3.3650     0.4423   7.607   0.0016 **
## putative_bacteriocin_1931  -0.0600     0.5418  -0.111   0.9171   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6256 on 4 degrees of freedom
## Multiple R-squared:  0.003057,   Adjusted R-squared:  -0.2462 
## F-statistic: 0.01227 on 1 and 4 DF,  p-value: 0.9171
model_2 <- lm(mic_sub ~ putative_bacteriocin_193,data=mb_sciuri)
summary(model_2)
## 
## Call:
## lm(formula = mic_sub ~ putative_bacteriocin_193, data = mb_sciuri)
## 
## Residuals:
##      1      2      3      4      5      6 
##  0.145  0.335  0.165 -0.165  0.225 -0.705 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 3.8650     0.3032  12.748 0.000218 ***
## putative_bacteriocin_1931  -0.5600     0.3713  -1.508 0.206010    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4288 on 4 degrees of freedom
## Multiple R-squared:  0.3625, Adjusted R-squared:  0.2031 
## F-statistic: 2.274 on 1 and 4 DF,  p-value: 0.206
# clades

# List of target isolates
target_isolates <- c("SSCI1", "SSCI2", "SSCI5", "SSCI6")


# Create a new column 'presence' based on target isolates
mb_sciuri <- mb_sciuri %>%
  mutate(clade = ifelse(wgs_isolate_id %in% target_isolates, "Present", "Absent"))


table1(~mic_sau + mic_sub+sau_inhibition_category+sub_inhibition_category+case_control+aip_1+aip_2+aip_3+aip_4+sactipeptides+subtilosin_a+putative_bacteriocin_193+putative_bacteriocin_194|clade,data=mb_sciuri)
Absent
(N=2)
Present
(N=4)
Overall
(N=6)
mic_sau
Mean (SD) 3.37 (0.940) 3.31 (0.476) 3.33 (0.560)
Median [Min, Max] 3.37 [2.70, 4.03] 3.49 [2.60, 3.64] 3.49 [2.60, 4.03]
mic_sub
Mean (SD) 3.87 (0.233) 3.31 (0.476) 3.49 (0.480)
Median [Min, Max] 3.87 [3.70, 4.03] 3.49 [2.60, 3.64] 3.59 [2.60, 4.03]
sau_inhibition_category
11 to 20 1 (50.0%) 0 (0%) 1 (16.7%)
51 to 60 1 (50.0%) 0 (0%) 1 (16.7%)
31 to 40 0 (0%) 1 (25.0%) 1 (16.7%)
41 to 50 0 (0%) 2 (50.0%) 2 (33.3%)
Top 10 0 (0%) 1 (25.0%) 1 (16.7%)
sub_inhibition_category
51 to 60 2 (100%) 0 (0%) 2 (33.3%)
11 to 20 0 (0%) 1 (25.0%) 1 (16.7%)
41 to 50 0 (0%) 3 (75.0%) 3 (50.0%)
case_control
Control 1 (50.0%) 2 (50.0%) 3 (50.0%)
Case 1 (50.0%) 2 (50.0%) 3 (50.0%)
aip_1
0 0 (0%) 3 (75.0%) 3 (50.0%)
1 2 (100%) 1 (25.0%) 3 (50.0%)
aip_2
0 2 (100%) 4 (100%) 6 (100%)
1 0 (0%) 0 (0%) 0 (0%)
aip_3
0 2 (100%) 4 (100%) 6 (100%)
1 0 (0%) 0 (0%) 0 (0%)
aip_4
0 2 (100%) 4 (100%) 6 (100%)
1 0 (0%) 0 (0%) 0 (0%)
sactipeptides
Mean (SD) 1.00 (0) 1.00 (0) 1.00 (0)
Median [Min, Max] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00]
subtilosin_a
1 0 (0%) 0 (0%) 0 (0%)
0 2 (100%) 4 (100%) 6 (100%)
putative_bacteriocin_193
0 2 (100%) 0 (0%) 2 (33.3%)
1 0 (0%) 4 (100%) 4 (66.7%)
putative_bacteriocin_194
0 2 (100%) 4 (100%) 6 (100%)
1 0 (0%) 0 (0%) 0 (0%)
# MIC across different clades

model <- lm(mic_sau~clade,data=mb_sciuri)
summary(model)
## 
## Call:
## lm(formula = mic_sau ~ clade, data = mb_sciuri)
## 
## Residuals:
##      1      2      3      4      5      6 
##  0.145  0.335  0.665 -0.665  0.225 -0.705 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    3.3650     0.4423   7.607   0.0016 **
## cladePresent  -0.0600     0.5418  -0.111   0.9171   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6256 on 4 degrees of freedom
## Multiple R-squared:  0.003057,   Adjusted R-squared:  -0.2462 
## F-statistic: 0.01227 on 1 and 4 DF,  p-value: 0.9171
Confint(model)
##              Estimate     2.5 %   97.5 %
## (Intercept)     3.365  2.136854 4.593146
## cladePresent   -0.060 -1.564165 1.444165
emmeans(model,~clade)
##  clade   emmean    SE df lower.CL upper.CL
##  Absent    3.37 0.442  4     2.14     4.59
##  Present   3.31 0.313  4     2.44     4.17
## 
## Confidence level used: 0.95
model <- lm(mic_sub~clade,data=mb_sciuri)
summary(model)
## 
## Call:
## lm(formula = mic_sub ~ clade, data = mb_sciuri)
## 
## Residuals:
##      1      2      3      4      5      6 
##  0.145  0.335  0.165 -0.165  0.225 -0.705 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.8650     0.3032  12.748 0.000218 ***
## cladePresent  -0.5600     0.3713  -1.508 0.206010    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4288 on 4 degrees of freedom
## Multiple R-squared:  0.3625, Adjusted R-squared:  0.2031 
## F-statistic: 2.274 on 1 and 4 DF,  p-value: 0.206
Confint(model)
##              Estimate     2.5 %   97.5 %
## (Intercept)     3.865  3.023234 4.706766
## cladePresent   -0.560 -1.590948 0.470948
emmeans(model,~clade)
##  clade   emmean    SE df lower.CL upper.CL
##  Absent    3.87 0.303  4     3.02     4.71
##  Present   3.31 0.214  4     2.71     3.90
## 
## Confidence level used: 0.95
# AIP1

table1(~aip_1|clade,data=mb_sciuri)
Absent
(N=2)
Present
(N=4)
Overall
(N=6)
aip_1
0 0 (0%) 3 (75.0%) 3 (50.0%)
1 2 (100%) 1 (25.0%) 3 (50.0%)
fisher.test(table(mb_sciuri$aip_1,mb_sciuri$clade))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_sciuri$aip_1, mb_sciuri$clade)
## p-value = 0.4
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.000000 4.922984
## sample estimates:
## odds ratio 
##          0
# putative bacteriocin 193

table1(~putative_bacteriocin_193|clade,data=mb_sciuri)
Absent
(N=2)
Present
(N=4)
Overall
(N=6)
putative_bacteriocin_193
0 2 (100%) 0 (0%) 2 (33.3%)
1 0 (0%) 4 (100%) 4 (66.7%)
fisher.test(table(mb_sciuri$putative_bacteriocin_193,mb_sciuri$clade))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_sciuri$putative_bacteriocin_193, mb_sciuri$clade)
## p-value = 0.06667
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.5079839       Inf
## sample estimates:
## odds ratio 
##        Inf

2.4 SSUC

# ---- AMPs in SSUC ----

# none of the AMPs were both present and absent in at least 2 isolates

# Clades

# List of target isolates
target_isolates <- c("SSUC1", "SSUC2", "SSUC3", "SSUC4", "SSUC5", "SSUC10", "SSUC12")


# Create a new column 'presence' based on target isolates
mb_succinus <- mb_succinus %>%
  mutate(clade = ifelse(wgs_isolate_id %in% target_isolates, "Present", "Absent"))


table1(~mic_sau + mic_sub+sau_inhibition_category+sub_inhibition_category+subtilosin_a|clade,data=subset(mb_succinus,mb_succinus$wgs_species_3!="SAU"))
Absent
(N=5)
Present
(N=7)
Overall
(N=12)
mic_sau
Mean (SD) 3.50 (0.443) 2.44 (0.430) 2.92 (0.692)
Median [Min, Max] 3.58 [3.03, 4.11] 2.40 [1.92, 3.14] 3.03 [1.92, 4.11]
Missing 0 (0%) 1 (14.3%) 1 (8.3%)
mic_sub
Mean (SD) 3.50 (0.649) 2.44 (0.884) 2.92 (0.931)
Median [Min, Max] 3.68 [2.58, 4.11] 2.21 [1.65, 4.14] 2.58 [1.65, 4.14]
Missing 0 (0%) 1 (14.3%) 1 (8.3%)
sau_inhibition_category
21 to 30 2 (40.0%) 1 (14.3%) 3 (25.0%)
41 to 50 2 (40.0%) 0 (0%) 2 (16.7%)
51 to 60 1 (20.0%) 0 (0%) 1 (8.3%)
Top 10 0 (0%) 5 (71.4%) 5 (41.7%)
Missing 0 (0%) 1 (14.3%) 1 (8.3%)
sub_inhibition_category
11 to 20 1 (20.0%) 0 (0%) 1 (8.3%)
31 to 40 1 (20.0%) 0 (0%) 1 (8.3%)
41 to 50 1 (20.0%) 0 (0%) 1 (8.3%)
51 to 60 1 (20.0%) 0 (0%) 1 (8.3%)
61 to 70 1 (20.0%) 1 (14.3%) 2 (16.7%)
Top 10 0 (0%) 5 (71.4%) 5 (41.7%)
Missing 0 (0%) 1 (14.3%) 1 (8.3%)
subtilosin_a
1 0 (0%) 0 (0%) 0 (0%)
0 5 (100%) 7 (100%) 12 (100%)
# MIC across different clades

model <- lm(mic_sau~clade,data=mb_succinus)
summary(model)
## 
## Call:
## lm(formula = mic_sau ~ clade, data = mb_succinus)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -0.520 -0.346  0.070  0.194  0.700 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.5020     0.1949  17.971 2.33e-08 ***
## cladePresent  -1.0620     0.2639  -4.025    0.003 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4357 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6429, Adjusted R-squared:  0.6032 
## F-statistic:  16.2 on 1 and 9 DF,  p-value: 0.002996
Confint(model)
##              Estimate     2.5 %     97.5 %
## (Intercept)     3.502  3.061169  3.9428312
## cladePresent   -1.062 -1.658888 -0.4651118
emmeans(model,~clade)
##  clade   emmean    SE df lower.CL upper.CL
##  Absent    3.50 0.195  9     3.06     3.94
##  Present   2.44 0.178  9     2.04     2.84
## 
## Confidence level used: 0.95
model <- lm(mic_sub~clade,data=mb_succinus)
summary(model)
## 
## Call:
## lm(formula = mic_sub ~ clade, data = mb_succinus)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -0.922 -0.456 -0.160  0.353  1.700 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.5020     0.3524   9.937 3.77e-06 ***
## cladePresent  -1.0620     0.4772  -2.226   0.0531 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.788 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.355,  Adjusted R-squared:  0.2833 
## F-statistic: 4.953 on 1 and 9 DF,  p-value: 0.05308
Confint(model)
##              Estimate     2.5 %     97.5 %
## (Intercept)     3.502  2.704779 4.29922133
## cladePresent   -1.062 -2.141443 0.01744278
emmeans(model,~clade)
##  clade   emmean    SE df lower.CL upper.CL
##  Absent    3.50 0.352  9     2.70     4.30
##  Present   2.44 0.322  9     1.71     3.17
## 
## Confidence level used: 0.95

2.5 SXYL & SPXYL

# ---- AMPs in SXYL/PXYL ----

# AIP2

table1(~aip_2|case_control,data=mb_xylosus)
Control
(N=8)
Case
(N=10)
Overall
(N=18)
aip_2
0 2 (25.0%) 4 (40.0%) 6 (33.3%)
1 6 (75.0%) 6 (60.0%) 12 (66.7%)
model_1 <- glm(aip_2 ~ case_control,family=binomial(link="logit"),data=mb_xylosus)
summary(model_1)
## 
## Call:
## glm(formula = aip_2 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_xylosus)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)        1.0986     0.8165   1.346    0.178
## case_controlCase  -0.6931     1.0408  -0.666    0.505
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 22.915  on 17  degrees of freedom
## Residual deviance: 22.458  on 16  degrees of freedom
## AIC: 26.458
## 
## Number of Fisher Scoring iterations: 4
exp(Confint(model_1))
##                  Estimate      2.5 %    97.5 %
## (Intercept)           3.0 0.69120194 20.475683
## case_controlCase      0.5 0.05327731  3.663896
effectsize::oddsratio_to_riskratio(model_1)
## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.75) - make sure your intercept
##   is meaningful.
## CIs are back-transformed from the logit scale.
## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.75 |             
## case control [Case] |       0.80 | [0.18, 1.22]
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.
emmeans(model_1,~case_control,type="response")
##  case_control prob    SE  df asymp.LCL asymp.UCL
##  Control      0.75 0.153 Inf     0.377     0.937
##  Case         0.60 0.155 Inf     0.297     0.842
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
# AIP3

table1(~aip_3|case_control,data=mb_xylosus)
Control
(N=8)
Case
(N=10)
Overall
(N=18)
aip_3
0 6 (75.0%) 7 (70.0%) 13 (72.2%)
1 2 (25.0%) 3 (30.0%) 5 (27.8%)
model_2 <- glm(aip_3 ~ case_control,family=binomial(link="logit"),data=mb_xylosus)
summary(model_2)
## 
## Call:
## glm(formula = aip_3 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_xylosus)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)       -1.0986     0.8165  -1.346    0.178
## case_controlCase   0.2513     1.0690   0.235    0.814
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 21.270  on 17  degrees of freedom
## Residual deviance: 21.215  on 16  degrees of freedom
## AIC: 25.215
## 
## Number of Fisher Scoring iterations: 4
exp(Confint(model_2))
##                   Estimate      2.5 %    97.5 %
## (Intercept)      0.3333333 0.04883842  1.446755
## case_controlCase 1.2857143 0.15782429 12.403957
effectsize::oddsratio_to_riskratio(model_2)
## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.25) - make sure your intercept
##   is meaningful.
## CIs are back-transformed from the logit scale.
## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.25 |             
## case control [Case] |       1.20 | [0.20, 3.22]
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.
emmeans(model_2,~case_control,type="response")
##  case_control prob    SE  df asymp.LCL asymp.UCL
##  Control      0.25 0.153 Inf    0.0630     0.623
##  Case         0.30 0.145 Inf    0.0998     0.624
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
# putative bacteriocin 194

table1(~putative_bacteriocin_194|case_control,data=mb_xylosus)
Control
(N=8)
Case
(N=10)
Overall
(N=18)
putative_bacteriocin_194
0 5 (62.5%) 6 (60.0%) 11 (61.1%)
1 3 (37.5%) 4 (40.0%) 7 (38.9%)
model_3 <- glm(putative_bacteriocin_194 ~ case_control,family=binomial(link="logit"),data=mb_xylosus)
summary(model_3)
## 
## Call:
## glm(formula = putative_bacteriocin_194 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_xylosus)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)       -0.5108     0.7303  -0.699    0.484
## case_controlCase   0.1054     0.9747   0.108    0.914
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 24.057  on 17  degrees of freedom
## Residual deviance: 24.045  on 16  degrees of freedom
## AIC: 28.045
## 
## Number of Fisher Scoring iterations: 4
exp(Confint(model_3))
##                  Estimate     2.5 %   97.5 %
## (Intercept)      0.600000 0.1230568 2.445302
## case_controlCase 1.111111 0.1616472 8.069622
effectsize::oddsratio_to_riskratio(model_3)
## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.38) - make sure your intercept
##   is meaningful.
## CIs are back-transformed from the logit scale.
## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.38 |             
## case control [Case] |       1.07 | [0.24, 2.21]
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.
emmeans(model_3,~case_control,type="response")
##  case_control  prob    SE  df asymp.LCL asymp.UCL
##  Control      0.375 0.171 Inf     0.125     0.715
##  Case         0.400 0.155 Inf     0.158     0.703
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
# Association between AMPs and MIC in SXYL

# AIP2

model_1 <- lm(mic_sau ~ aip_2,data=mb_xylosus)
summary(model_1)
## 
## Call:
## lm(formula = mic_sau ~ aip_2, data = mb_xylosus)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.43500 -0.25917 -0.13417  0.04438  0.94500 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.1450     0.1733   18.15 4.25e-12 ***
## aip_21       -0.1358     0.2122   -0.64    0.531    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4244 on 16 degrees of freedom
## Multiple R-squared:  0.02497,    Adjusted R-squared:  -0.03597 
## F-statistic: 0.4097 on 1 and 16 DF,  p-value: 0.5312
model_1 <- lm(mic_sub ~ aip_2,data=mb_xylosus)
summary(model_1)
## 
## Call:
## lm(formula = mic_sub ~ aip_2, data = mb_xylosus)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.26833 -0.38250 -0.06542  0.56354  1.11167 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.9783     0.2678  11.122 6.13e-09 ***
## aip_21        0.1142     0.3280   0.348    0.732    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6559 on 16 degrees of freedom
## Multiple R-squared:  0.007517,   Adjusted R-squared:  -0.05451 
## F-statistic: 0.1212 on 1 and 16 DF,  p-value: 0.7323
# AIP3

model_1 <- lm(mic_sau ~ aip_3,data=mb_xylosus)
summary(model_1)
## 
## Call:
## lm(formula = mic_sau ~ aip_3, data = mb_xylosus)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.47800 -0.25308 -0.08808  0.04442  0.92692 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.0031     0.1167  25.734  1.9e-14 ***
## aip_31        0.1849     0.2214   0.835    0.416    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4208 on 16 degrees of freedom
## Multiple R-squared:  0.04177,    Adjusted R-squared:  -0.01811 
## F-statistic: 0.6975 on 1 and 16 DF,  p-value: 0.4159
model_1 <- lm(mic_sub ~ aip_3,data=mb_xylosus)
summary(model_1)
## 
## Call:
## lm(formula = mic_sub ~ aip_3, data = mb_xylosus)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2780 -0.3700 -0.1100  0.5705  1.1020 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.0800     0.1822  16.904 1.26e-11 ***
## aip_31       -0.0920     0.3457  -0.266    0.794    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.657 on 16 degrees of freedom
## Multiple R-squared:  0.004407,   Adjusted R-squared:  -0.05782 
## F-statistic: 0.07082 on 1 and 16 DF,  p-value: 0.7935
# putative bacteriocin 194

model_2 <- lm(mic_sau ~ putative_bacteriocin_194,data=mb_xylosus)
summary(model_2)
## 
## Call:
## lm(formula = mic_sau ~ putative_bacteriocin_194, data = mb_xylosus)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41091 -0.26591 -0.17403  0.06286  0.98286 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                3.02091    0.12890  23.436  8.2e-14 ***
## putative_bacteriocin_1941  0.08623    0.20670   0.417    0.682    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4275 on 16 degrees of freedom
## Multiple R-squared:  0.01076,    Adjusted R-squared:  -0.05107 
## F-statistic: 0.1741 on 1 and 16 DF,  p-value: 0.6821
model_2 <- lm(mic_sub ~ putative_bacteriocin_194,data=mb_xylosus)
summary(model_2)
## 
## Call:
## lm(formula = mic_sub ~ putative_bacteriocin_194, data = mb_xylosus)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.25429 -0.40182 -0.05805  0.56756  1.12571 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 3.1118     0.1972  15.782 3.56e-11 ***
## putative_bacteriocin_1941  -0.1475     0.3162  -0.467    0.647    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.654 on 16 degrees of freedom
## Multiple R-squared:  0.01342,    Adjusted R-squared:  -0.04824 
## F-statistic: 0.2177 on 1 and 16 DF,  p-value: 0.6471
# clades

target_isolates <- c("SXYL1", "SXYL2", "SXYL4", "SXYL10", "SXYL11", "SXYL13")

mb_xylosus <- mb_xylosus %>%
  mutate(clade = ifelse(wgs_isolate_id %in% target_isolates, "Present_xylosus", "Absent_xylosus"))


mb_xylosus$clade <- ifelse(mb_xylosus$wgs_species_3=="SXYL",mb_xylosus$clade,"Ps_xylosus")


table1(~mic_sau + mic_sub+sau_inhibition_category+sub_inhibition_category+case_control+aip_1+aip_2+aip_3+aip_4+sactipeptides+subtilosin_a+putative_bacteriocin_193+putative_bacteriocin_194|clade,data=mb_xylosus)
Absent_xylosus
(N=7)
Present_xylosus
(N=6)
Ps_xylosus
(N=5)
Overall
(N=18)
mic_sau
Mean (SD) 2.98 (0.291) 3.14 (0.505) 3.06 (0.522) 3.05 (0.417)
Median [Min, Max] 2.85 [2.74, 3.56] 3.04 [2.71, 4.09] 3.01 [2.61, 3.93] 2.92 [2.61, 4.09]
mic_sub
Mean (SD) 3.12 (0.514) 2.97 (0.915) 3.06 (0.522) 3.05 (0.639)
Median [Min, Max] 2.93 [2.56, 3.85] 3.04 [1.71, 4.09] 3.01 [2.61, 3.93] 2.97 [1.71, 4.09]
sau_inhibition_category
11 to 20 4 (57.1%) 2 (33.3%) 1 (20.0%) 7 (38.9%)
21 to 30 1 (14.3%) 1 (16.7%) 2 (40.0%) 4 (22.2%)
31 to 40 1 (14.3%) 2 (33.3%) 0 (0%) 3 (16.7%)
41 to 50 1 (14.3%) 0 (0%) 0 (0%) 1 (5.6%)
51 to 60 0 (0%) 1 (16.7%) 1 (20.0%) 2 (11.1%)
Top 10 0 (0%) 0 (0%) 1 (20.0%) 1 (5.6%)
sub_inhibition_category
11 to 20 1 (14.3%) 0 (0%) 2 (40.0%) 3 (16.7%)
21 to 30 3 (42.9%) 1 (16.7%) 2 (40.0%) 6 (33.3%)
31 to 40 1 (14.3%) 1 (16.7%) 0 (0%) 2 (11.1%)
51 to 60 2 (28.6%) 1 (16.7%) 1 (20.0%) 4 (22.2%)
61 to 70 0 (0%) 1 (16.7%) 0 (0%) 1 (5.6%)
Top 10 0 (0%) 2 (33.3%) 0 (0%) 2 (11.1%)
case_control
Control 3 (42.9%) 3 (50.0%) 2 (40.0%) 8 (44.4%)
Case 4 (57.1%) 3 (50.0%) 3 (60.0%) 10 (55.6%)
aip_1
0 7 (100%) 6 (100%) 5 (100%) 18 (100%)
1 0 (0%) 0 (0%) 0 (0%) 0 (0%)
aip_2
0 2 (28.6%) 4 (66.7%) 0 (0%) 6 (33.3%)
1 5 (71.4%) 2 (33.3%) 5 (100%) 12 (66.7%)
aip_3
0 6 (85.7%) 2 (33.3%) 5 (100%) 13 (72.2%)
1 1 (14.3%) 4 (66.7%) 0 (0%) 5 (27.8%)
aip_4
0 6 (85.7%) 6 (100%) 5 (100%) 17 (94.4%)
1 1 (14.3%) 0 (0%) 0 (0%) 1 (5.6%)
sactipeptides
Mean (SD) 1.00 (0) 1.00 (0) 1.00 (0) 1.00 (0)
Median [Min, Max] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00]
subtilosin_a
1 0 (0%) 0 (0%) 0 (0%) 0 (0%)
0 7 (100%) 6 (100%) 5 (100%) 18 (100%)
putative_bacteriocin_193
0 7 (100%) 6 (100%) 5 (100%) 18 (100%)
1 0 (0%) 0 (0%) 0 (0%) 0 (0%)
putative_bacteriocin_194
0 6 (85.7%) 0 (0%) 5 (100%) 11 (61.1%)
1 1 (14.3%) 6 (100%) 0 (0%) 7 (38.9%)
model <- lm(mic_sau~clade,data=mb_xylosus)
summary(model)
## 
## Call:
## lm(formula = mic_sau ~ clade, data = mb_xylosus)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.45200 -0.23810 -0.09029  0.03333  0.95333 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           2.97857    0.16547  18.001 1.44e-11 ***
## cladePresent_xylosus  0.15810    0.24356   0.649    0.526    
## cladePs_xylosus       0.08343    0.25634   0.325    0.749    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4378 on 15 degrees of freedom
## Multiple R-squared:  0.02745,    Adjusted R-squared:  -0.1022 
## F-statistic: 0.2117 on 2 and 15 DF,  p-value: 0.8116
Confint(model)
##                        Estimate      2.5 %    97.5 %
## (Intercept)          2.97857143  2.6258826 3.3312603
## cladePresent_xylosus 0.15809524 -0.3610487 0.6772391
## cladePs_xylosus      0.08342857 -0.4629546 0.6298118
emmeans(model,~clade)
##  clade           emmean    SE df lower.CL upper.CL
##  Absent_xylosus    2.98 0.165 15     2.63     3.33
##  Present_xylosus   3.14 0.179 15     2.76     3.52
##  Ps_xylosus        3.06 0.196 15     2.64     3.48
## 
## Confidence level used: 0.95
Anova(model,type="III")
## Anova Table (Type III tests)
## 
## Response: mic_sau
##             Sum Sq Df  F value    Pr(>F)    
## (Intercept) 62.103  1 324.0281 1.443e-11 ***
## clade        0.081  2   0.2117    0.8116    
## Residuals    2.875 15                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model <- lm(mic_sub~clade,data=mb_xylosus)
summary(model)
## 
## Call:
## lm(formula = mic_sub ~ clade, data = mb_xylosus)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2600 -0.3766 -0.0610  0.5589  1.1200 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           3.12143    0.25563  12.211 3.41e-09 ***
## cladePresent_xylosus -0.15143    0.37628  -0.402    0.693    
## cladePs_xylosus      -0.05943    0.39602  -0.150    0.883    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6763 on 15 degrees of freedom
## Multiple R-squared:  0.01074,    Adjusted R-squared:  -0.1212 
## F-statistic: 0.08141 on 2 and 15 DF,  p-value: 0.9222
Confint(model)
##                         Estimate      2.5 %    97.5 %
## (Intercept)           3.12142857  2.5765597 3.6662975
## cladePresent_xylosus -0.15142857 -0.9534539 0.6505967
## cladePs_xylosus      -0.05942857 -0.9035358 0.7846787
emmeans(model,~clade)
##  clade           emmean    SE df lower.CL upper.CL
##  Absent_xylosus    3.12 0.256 15     2.58     3.67
##  Present_xylosus   2.97 0.276 15     2.38     3.56
##  Ps_xylosus        3.06 0.302 15     2.42     3.71
## 
## Confidence level used: 0.95
Anova(model,type="III")
## Anova Table (Type III tests)
## 
## Response: mic_sub
##             Sum Sq Df  F value    Pr(>F)    
## (Intercept) 68.203  1 149.0984 3.407e-09 ***
## clade        0.074  2   0.0814    0.9222    
## Residuals    6.862 15                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# AIP2

table1(~aip_2|clade,data=mb_xylosus)
Absent_xylosus
(N=7)
Present_xylosus
(N=6)
Ps_xylosus
(N=5)
Overall
(N=18)
aip_2
0 2 (28.6%) 4 (66.7%) 0 (0%) 6 (33.3%)
1 5 (71.4%) 2 (33.3%) 5 (100%) 12 (66.7%)
fisher.test(table(mb_xylosus$aip_2,mb_xylosus$clade))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_xylosus$aip_2, mb_xylosus$clade)
## p-value = 0.07428
## alternative hypothesis: two.sided
# AIP3

table1(~aip_3|clade,data=mb_xylosus)
Absent_xylosus
(N=7)
Present_xylosus
(N=6)
Ps_xylosus
(N=5)
Overall
(N=18)
aip_3
0 6 (85.7%) 2 (33.3%) 5 (100%) 13 (72.2%)
1 1 (14.3%) 4 (66.7%) 0 (0%) 5 (27.8%)
fisher.test(table(mb_xylosus$aip_3,mb_xylosus$clade))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_xylosus$aip_3, mb_xylosus$clade)
## p-value = 0.03186
## alternative hypothesis: two.sided
# putative bacteriocin 194

table1(~putative_bacteriocin_194|clade,data=mb_xylosus)
Absent_xylosus
(N=7)
Present_xylosus
(N=6)
Ps_xylosus
(N=5)
Overall
(N=18)
putative_bacteriocin_194
0 6 (85.7%) 0 (0%) 5 (100%) 11 (61.1%)
1 1 (14.3%) 6 (100%) 0 (0%) 7 (38.9%)
fisher.test(table(mb_xylosus$putative_bacteriocin_194,mb_xylosus$clade))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_xylosus$putative_bacteriocin_194, mb_xylosus$clade)
## p-value = 0.0004085
## alternative hypothesis: two.sided

3 Virulence genes

3.1 Summary

# Number of genes identified

virulence_annotated %>% group_by(vir_gene_type) %>% summarise(n=n_distinct(vir_gene_abbreviation))
## # A tibble: 6 × 2
##   vir_gene_type              n
##   <chr>                  <int>
## 1 Adherence                 37
## 2 Enterotoxins_exotoxins    87
## 3 Exoenzymes                22
## 4 Immune_evasion            34
## 5 Iron_uptake_metabolism    29
## 6 Toxin_genes               30
virulence_annotated %>% ungroup() %>% summarise(n=n_distinct(vir_gene_nonredundant))
## # A tibble: 1 × 1
##       n
##   <int>
## 1   200
virulence_annotated %>% group_by(vir_gene_type) %>% summarise(n=n_distinct(vir_gene_nonredundant))
## # A tibble: 6 × 2
##   vir_gene_type              n
##   <chr>                  <int>
## 1 Adherence                 30
## 2 Enterotoxins_exotoxins    59
## 3 Exoenzymes                18
## 4 Immune_evasion            34
## 5 Iron_uptake_metabolism    29
## 6 Toxin_genes               30
# Adherence genes 

adherence_summary <- adherence_long %>% group_by(vir_gene_nonredundant,wgs_species_3) %>% summarise(n=max(presence_gene_nr)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")
## `summarise()` has grouped output by 'vir_gene_nonredundant'. You can override
## using the `.groups` argument.
adherence_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n)) %>% mutate(prop_present = (n_genes/37)*100)
## # A tibble: 10 × 3
##    wgs_species_3 n_genes prop_present
##    <chr>           <dbl>        <dbl>
##  1 MSC                20         54.1
##  2 SAU                30         81.1
##  3 SCH                19         51.4
##  4 SCO                 8         21.6
##  5 SDEV               17         45.9
##  6 SEQ                14         37.8
##  7 SHAEM              21         56.8
##  8 SPXYL              17         45.9
##  9 SSUC               14         37.8
## 10 SXYL               22         59.5
# Exoenzymes genes 

exoenzymes_summary <- exoenzymes_long %>% group_by(vir_gene_nonredundant,wgs_species_3) %>% summarise(n=max(presence_gene_nr)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")
## `summarise()` has grouped output by 'vir_gene_nonredundant'. You can override
## using the `.groups` argument.
exoenzymes_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n)) %>% mutate(prop_present = (n_genes/22)*100)
## # A tibble: 10 × 3
##    wgs_species_3 n_genes prop_present
##    <chr>           <dbl>        <dbl>
##  1 MSC                 6         27.3
##  2 SAU                17         77.3
##  3 SCH                 6         27.3
##  4 SCO                 7         31.8
##  5 SDEV                4         18.2
##  6 SEQ                 4         18.2
##  7 SHAEM               4         18.2
##  8 SPXYL               7         31.8
##  9 SSUC                9         40.9
## 10 SXYL                8         36.4
# Immune evasion genes

im_ev_summary <- im_ev_long %>% group_by(vir_gene_nonredundant,wgs_species_3) %>% summarise(n=max(presence_gene_nr)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")
## `summarise()` has grouped output by 'vir_gene_nonredundant'. You can override
## using the `.groups` argument.
im_ev_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n)) %>% mutate(prop_present = (n_genes/35)*100)
## # A tibble: 10 × 3
##    wgs_species_3 n_genes prop_present
##    <chr>           <dbl>        <dbl>
##  1 MSC                 8         22.9
##  2 SAU                34         97.1
##  3 SCH                30         85.7
##  4 SCO                 8         22.9
##  5 SDEV               12         34.3
##  6 SEQ                15         42.9
##  7 SHAEM              18         51.4
##  8 SPXYL               4         11.4
##  9 SSUC                4         11.4
## 10 SXYL               28         80
# Iron related genes

iron_summary <- iron_long %>% group_by(vir_gene_nonredundant,wgs_species_3) %>% summarise(n=max(presence_gene_nr)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")
## `summarise()` has grouped output by 'vir_gene_nonredundant'. You can override
## using the `.groups` argument.
iron_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n)) %>% mutate(prop_present = (n_genes/29)*100)
## # A tibble: 10 × 3
##    wgs_species_3 n_genes prop_present
##    <chr>           <dbl>        <dbl>
##  1 MSC                17         58.6
##  2 SAU                29        100  
##  3 SCH                14         48.3
##  4 SCO                10         34.5
##  5 SDEV               12         41.4
##  6 SEQ                20         69.0
##  7 SHAEM              16         55.2
##  8 SPXYL              11         37.9
##  9 SSUC               11         37.9
## 10 SXYL               10         34.5
# Enterotoxins and exotoxins genes

enterotoxin_summary <- enterotoxin_long %>% group_by(vir_gene_nonredundant,wgs_species_3) %>% summarise(n=max(presence_gene_nr)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")
## `summarise()` has grouped output by 'vir_gene_nonredundant'. You can override
## using the `.groups` argument.
enterotoxin_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n)) %>% mutate(prop_present = (n_genes/87)*100)
## # A tibble: 2 × 3
##   wgs_species_3 n_genes prop_present
##   <chr>           <dbl>        <dbl>
## 1 SAU                59         67.8
## 2 SCH                36         41.4
# Toxin genes 

toxin_summary <- toxin_long %>% group_by(vir_gene_nonredundant,wgs_species_3) %>% summarise(n=max(presence_gene_nr)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")
## `summarise()` has grouped output by 'vir_gene_nonredundant'. You can override
## using the `.groups` argument.
toxin_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n)) %>% mutate(prop_present = (n_genes/36)*100)
## # A tibble: 10 × 3
##    wgs_species_3 n_genes prop_present
##    <chr>           <dbl>        <dbl>
##  1 MSC                 3         8.33
##  2 SAU                27        75   
##  3 SCH                10        27.8 
##  4 SCO                 8        22.2 
##  5 SDEV                7        19.4 
##  6 SEQ                 6        16.7 
##  7 SHAEM               8        22.2 
##  8 SPXYL               9        25   
##  9 SSUC                7        19.4 
## 10 SXYL                8        22.2

3.2 Visualizations

3.2.1 Adherence

# ---- Adherence genes ----

adherence_sau <- adherence_long %>% filter(grepl("SAU",wgs_isolate_id)) 

adherence_plot_1 <- adherence_sau %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(adherence_sau$wgs_isolate_id,adherence_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

adherence_sch <- adherence_long %>% filter(grepl("SCH",wgs_isolate_id)) 

adherence_plot_2 <- adherence_sch %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(adherence_sch$wgs_isolate_id,adherence_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


adherence_shaem <- adherence_long %>% filter(grepl("SHAEM",wgs_isolate_id)) 


adherence_plot_3 <- adherence_shaem %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(adherence_shaem$wgs_isolate_id,adherence_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




adherence_ssc <- adherence_long %>% filter(grepl("MSC",wgs_isolate_id)) 


adherence_plot_4 <- adherence_ssc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(adherence_ssc$wgs_isolate_id,adherence_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Mammaliicoccus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())





adherence_ssuc <- adherence_long %>% filter(grepl("SSUC",wgs_isolate_id)) 




adherence_plot_5 <- adherence_ssuc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(adherence_ssuc$wgs_isolate_id,adherence_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())



adherence_ssxyl <- adherence_long %>% filter(grepl("SXYL",wgs_isolate_id)) 

adherence_ssxyl$wgs_isolate_id <- fct_reorder(adherence_ssxyl$wgs_isolate_id,adherence_ssxyl$wgs_id)

adherence_spxyl <- adherence_long %>% filter(grepl("SPXYL",wgs_isolate_id)) 

levels(adherence_long$wgs_isolate_id)
## NULL
adherence_spxyl$wgs_isolate_id <- fct_reorder(adherence_spxyl$wgs_isolate_id,adherence_spxyl$wgs_id)
             

adherence_sxyl_spxyl <- rbind(adherence_ssxyl,adherence_spxyl)                                 
#adherence_ssxyl$wgs_isolate_id <- fct_reorder(adherence_ssxyl$wgs_isolate_id,adherence_ssxyl$wgs_id)

table(adherence_long$wgs_isolate_id)
## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      30      30      30      30      30      30      30      30      30      30 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      30      30      30      30      30      30      30      30      30      30 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      30      30      30      30      30      30      30      30      30      30 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      30      30      30      30      30      30      30      30      30      30 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      30      30      30      30      30      30      30      30      30      30 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      30      30      30      30      30      30      30      30      30      30 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      30      30      30      30      30      30      30      30      30      30 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      30      30      30      30      30      30      30      30      30      30 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      30      30      30      30
adherence_plot_6 <- adherence_sxyl_spxyl %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


adherence_sco <- adherence_long %>% filter(grepl("SCO",wgs_isolate_id)) 

adherence_sco$wgs_isolate_id <- fct_reorder(adherence_sco$wgs_isolate_id,adherence_sco$wgs_id)

adherence_sdev <- adherence_long %>% filter(grepl("SDEV",wgs_isolate_id)) 

levels(adherence_long$wgs_isolate_id)
## NULL
adherence_sdev$wgs_isolate_id <- fct_reorder(adherence_sdev$wgs_isolate_id,adherence_sdev$wgs_id)



adherence_seq <- adherence_long %>% filter(grepl("SEQ",wgs_isolate_id)) 

adherence_seq$wgs_isolate_id <- fct_reorder(adherence_seq$wgs_isolate_id,adherence_seq$wgs_id)


adherence_other <- rbind(adherence_sco,adherence_sdev,adherence_seq)                                 

table(adherence_long$wgs_isolate_id)
## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      30      30      30      30      30      30      30      30      30      30 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      30      30      30      30      30      30      30      30      30      30 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      30      30      30      30      30      30      30      30      30      30 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      30      30      30      30      30      30      30      30      30      30 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      30      30      30      30      30      30      30      30      30      30 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      30      30      30      30      30      30      30      30      30      30 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      30      30      30      30      30      30      30      30      30      30 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      30      30      30      30      30      30      30      30      30      30 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      30      30      30      30
adherence_plot_7 <- adherence_other %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Adherence",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),,axis.text.x = element_text(angle = 45, hjust = 1))




# Multipanel figure

adherence_plot <- ggarrange(adherence_plot_1 + rremove("xlab")+ rremove("ylab"),adherence_plot_2 + rremove("xlab")+ rremove("ylab"),adherence_plot_3 + rremove("xlab")+ rremove("ylab"),adherence_plot_4 + rremove("xlab")+ rremove("ylab"),adherence_plot_5 + rremove("xlab")+ rremove("ylab"),adherence_plot_6 + rremove("xlab")+ rremove("ylab"),adherence_plot_7 + rremove("xlab")+ rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2.25))

annotate_figure(adherence_plot,
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/adherence.png",width = 20, height = 40, units = "cm")
ggsave(plot = last_plot(),"./figures/adherence.pdf",width = 20, height = 40, units = "cm")

3.2.2 Exoenzymes

# ---- exoenzymes genes ----

exoenzymes_sau <- exoenzymes_long %>% filter(grepl("SAU",wgs_isolate_id)) 


exoenzymes_plot_1 <- exoenzymes_sau %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(exoenzymes_sau$wgs_isolate_id,exoenzymes_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




exoenzymes_sch <- exoenzymes_long %>% filter(grepl("SCH",wgs_isolate_id)) 


exoenzymes_plot_2 <- exoenzymes_sch %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(exoenzymes_sch$wgs_isolate_id,exoenzymes_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


exoenzymes_shaem <- exoenzymes_long %>% filter(grepl("SHAEM",wgs_isolate_id)) 


exoenzymes_plot_3 <- exoenzymes_shaem %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(exoenzymes_shaem$wgs_isolate_id,exoenzymes_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




exoenzymes_ssc <- exoenzymes_long %>% filter(grepl("MSC",wgs_isolate_id)) 


exoenzymes_plot_4 <- exoenzymes_ssc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(exoenzymes_ssc$wgs_isolate_id,exoenzymes_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Mammaliicoccus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())





exoenzymes_ssuc <- exoenzymes_long %>% filter(grepl("SSUC",wgs_isolate_id)) 




exoenzymes_plot_5 <- exoenzymes_ssuc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(exoenzymes_ssuc$wgs_isolate_id,exoenzymes_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())



exoenzymes_ssxyl <- exoenzymes_long %>% filter(grepl("SXYL",wgs_isolate_id)) 

exoenzymes_ssxyl$wgs_isolate_id <- fct_reorder(exoenzymes_ssxyl$wgs_isolate_id,exoenzymes_ssxyl$wgs_id)

exoenzymes_spxyl <- exoenzymes_long %>% filter(grepl("SPXYL",wgs_isolate_id)) 

levels(exoenzymes_long$wgs_isolate_id)
## NULL
exoenzymes_spxyl$wgs_isolate_id <- fct_reorder(exoenzymes_spxyl$wgs_isolate_id,exoenzymes_spxyl$wgs_id)


exoenzymes_sxyl_spxyl <- rbind(exoenzymes_ssxyl,exoenzymes_spxyl)                                 
#exoenzymes_ssxyl$wgs_isolate_id <- fct_reorder(exoenzymes_ssxyl$wgs_isolate_id,exoenzymes_ssxyl$wgs_id)

table(exoenzymes_long$wgs_isolate_id)
## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      18      18      18      18      18      18      18      18      18      18 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      18      18      18      18      18      18      18      18      18      18 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      18      18      18      18      18      18      18      18      18      18 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      18      18      18      18      18      18      18      18      18      18 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      18      18      18      18      18      18      18      18      18      18 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      18      18      18      18      18      18      18      18      18      18 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      18      18      18      18      18      18      18      18      18      18 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      18      18      18      18      18      18      18      18      18      18 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      18      18      18      18
exoenzymes_plot_6 <- exoenzymes_sxyl_spxyl %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


exoenzymes_sco <- exoenzymes_long %>% filter(grepl("SCO",wgs_isolate_id)) 

exoenzymes_sco$wgs_isolate_id <- fct_reorder(exoenzymes_sco$wgs_isolate_id,exoenzymes_sco$wgs_id)

exoenzymes_sdev <- exoenzymes_long %>% filter(grepl("SDEV",wgs_isolate_id)) 

levels(exoenzymes_long$wgs_isolate_id)
## NULL
exoenzymes_sdev$wgs_isolate_id <- fct_reorder(exoenzymes_sdev$wgs_isolate_id,exoenzymes_sdev$wgs_id)



exoenzymes_seq <- exoenzymes_long %>% filter(grepl("SEQ",wgs_isolate_id)) 

exoenzymes_seq$wgs_isolate_id <- fct_reorder(exoenzymes_seq$wgs_isolate_id,exoenzymes_seq$wgs_id)


exoenzymes_other <- rbind(exoenzymes_sco,exoenzymes_sdev,exoenzymes_seq)                                 

table(exoenzymes_long$wgs_isolate_id)
## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      18      18      18      18      18      18      18      18      18      18 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      18      18      18      18      18      18      18      18      18      18 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      18      18      18      18      18      18      18      18      18      18 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      18      18      18      18      18      18      18      18      18      18 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      18      18      18      18      18      18      18      18      18      18 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      18      18      18      18      18      18      18      18      18      18 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      18      18      18      18      18      18      18      18      18      18 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      18      18      18      18      18      18      18      18      18      18 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      18      18      18      18
exoenzymes_plot_7 <- exoenzymes_other %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Exoenzymes",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x = element_text(angle = 45, hjust = 1))




# Multipanel figure

exoenzymes_plot <- ggarrange(exoenzymes_plot_1 + rremove("xlab")+ rremove("ylab"),exoenzymes_plot_2 + rremove("xlab")+ rremove("ylab"),exoenzymes_plot_3 + rremove("xlab")+ rremove("ylab"),exoenzymes_plot_4 + rremove("xlab")+ rremove("ylab"),exoenzymes_plot_5 + rremove("xlab")+ rremove("ylab"),exoenzymes_plot_6 + rremove("xlab")+ rremove("ylab"),exoenzymes_plot_7 + rremove("xlab")+ rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2.25))

annotate_figure(exoenzymes_plot,
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/exoenzymes.pdf",width = 20, height = 40, units = "cm")
ggsave(plot = last_plot(),"./figures/exoenzymes.png",width = 20, height = 40, units = "cm")




# plot adherence + exoenzymes

adherence_exoenzymes_plot <- ggarrange(
  adherence_plot_1 + rremove("xlab") + rremove("ylab"),
  exoenzymes_plot_1 + rremove("xlab") + rremove("ylab") ,
  adherence_plot_2 + rremove("xlab") + rremove("ylab"),
  exoenzymes_plot_2 + rremove("xlab") + rremove("ylab") ,
  adherence_plot_3 + rremove("xlab") + rremove("ylab"),
  exoenzymes_plot_3 + rremove("xlab") + rremove("ylab") ,
  adherence_plot_4 + rremove("xlab") + rremove("ylab"),
  exoenzymes_plot_4 + rremove("xlab") + rremove("ylab") ,
  adherence_plot_5 + rremove("xlab") + rremove("ylab"),
  exoenzymes_plot_5 + rremove("xlab") + rremove("ylab") ,
  adherence_plot_6 + rremove("xlab") + rremove("ylab"),
  exoenzymes_plot_6 + rremove("xlab") + rremove("ylab") ,
  adherence_plot_7 + rremove("ylab"),
  exoenzymes_plot_7 + rremove("ylab") ,
  ncol = 2, nrow = 7, common.legend = TRUE, align = "v",
  labels = c("A1", "A2", "B1", "B2", "C1", "C2", "D1", "D2", "E1", "E2", "F1", "F2", "G1", "G2"),
  heights = c(1.25,4.5, 4.25, 1.75,3,3.5, 2.25)
)

annotate_figure(adherence_exoenzymes_plot,
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/figure_8.pdf",width = 40, height = 40, units = "cm",dpi=600)
ggsave(plot = last_plot(),"./figures/figure_8.png",width = 40, height = 40, units = "cm",dpi=600)
ggsave(plot = last_plot(),"./figures/figure_8.jpeg",width = 40, height = 40, units = "cm",dpi=600)

3.2.3 Immune evasion

# ---- im_ev genes ----

im_ev_sau <- im_ev_long %>% filter(grepl("SAU",wgs_isolate_id)) 


im_ev_plot_1 <- im_ev_sau %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(im_ev_sau$wgs_isolate_id,im_ev_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




im_ev_sch <- im_ev_long %>% filter(grepl("SCH",wgs_isolate_id)) 


im_ev_plot_2 <- im_ev_sch %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(im_ev_sch$wgs_isolate_id,im_ev_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


im_ev_shaem <- im_ev_long %>% filter(grepl("SHAEM",wgs_isolate_id)) 


im_ev_plot_3 <- im_ev_shaem %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(im_ev_shaem$wgs_isolate_id,im_ev_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




im_ev_ssc <- im_ev_long %>% filter(grepl("MSC",wgs_isolate_id)) 


im_ev_plot_4 <- im_ev_ssc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(im_ev_ssc$wgs_isolate_id,im_ev_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Mammaliicoccus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())





im_ev_ssuc <- im_ev_long %>% filter(grepl("SSUC",wgs_isolate_id)) 




im_ev_plot_5 <- im_ev_ssuc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(im_ev_ssuc$wgs_isolate_id,im_ev_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())



im_ev_ssxyl <- im_ev_long %>% filter(grepl("SXYL",wgs_isolate_id)) 

im_ev_ssxyl$wgs_isolate_id <- fct_reorder(im_ev_ssxyl$wgs_isolate_id,im_ev_ssxyl$wgs_id)

im_ev_spxyl <- im_ev_long %>% filter(grepl("SPXYL",wgs_isolate_id)) 

levels(im_ev_long$wgs_isolate_id)
## NULL
im_ev_spxyl$wgs_isolate_id <- fct_reorder(im_ev_spxyl$wgs_isolate_id,im_ev_spxyl$wgs_id)


im_ev_sxyl_spxyl <- rbind(im_ev_ssxyl,im_ev_spxyl)                                 
#im_ev_ssxyl$wgs_isolate_id <- fct_reorder(im_ev_ssxyl$wgs_isolate_id,im_ev_ssxyl$wgs_id)

table(im_ev_long$wgs_isolate_id)
## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      34      34      34      34      34      34      34      34      34      34 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      34      34      34      34      34      34      34      34      34      34 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      34      34      34      34      34      34      34      34      34      34 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      34      34      34      34      34      34      34      34      34      34 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      34      34      34      34      34      34      34      34      34      34 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      34      34      34      34      34      34      34      34      34      34 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      34      34      34      34      34      34      34      34      34      34 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      34      34      34      34      34      34      34      34      34      34 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      34      34      34      34
im_ev_plot_6 <- im_ev_sxyl_spxyl %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


im_ev_sco <- im_ev_long %>% filter(grepl("SCO",wgs_isolate_id)) 

im_ev_sco$wgs_isolate_id <- fct_reorder(im_ev_sco$wgs_isolate_id,im_ev_sco$wgs_id)

im_ev_sdev <- im_ev_long %>% filter(grepl("SDEV",wgs_isolate_id)) 

levels(im_ev_long$wgs_isolate_id)
## NULL
im_ev_sdev$wgs_isolate_id <- fct_reorder(im_ev_sdev$wgs_isolate_id,im_ev_sdev$wgs_id)



im_ev_seq <- im_ev_long %>% filter(grepl("SEQ",wgs_isolate_id)) 

im_ev_seq$wgs_isolate_id <- fct_reorder(im_ev_seq$wgs_isolate_id,im_ev_seq$wgs_id)


im_ev_other <- rbind(im_ev_sco,im_ev_sdev,im_ev_seq)                                 

table(im_ev_long$wgs_isolate_id)
## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      34      34      34      34      34      34      34      34      34      34 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      34      34      34      34      34      34      34      34      34      34 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      34      34      34      34      34      34      34      34      34      34 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      34      34      34      34      34      34      34      34      34      34 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      34      34      34      34      34      34      34      34      34      34 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      34      34      34      34      34      34      34      34      34      34 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      34      34      34      34      34      34      34      34      34      34 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      34      34      34      34      34      34      34      34      34      34 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      34      34      34      34
im_ev_plot_7 <- im_ev_other %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Immune evasion",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x = element_text(angle = 45, hjust = 1))




# Multipanel figure

im_ev_plot <- ggarrange(im_ev_plot_1 + rremove("xlab")+ rremove("ylab"),im_ev_plot_2 + rremove("xlab")+ rremove("ylab"),im_ev_plot_3 + rremove("xlab")+ rremove("ylab"),im_ev_plot_4 + rremove("xlab")+ rremove("ylab"),im_ev_plot_5 + rremove("xlab")+ rremove("ylab"),im_ev_plot_6 + rremove("xlab")+ rremove("ylab"),im_ev_plot_7 + rremove("xlab")+ rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2.25))

annotate_figure(im_ev_plot,
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/im_ev.pdf",width = 20, height = 40, units = "cm")
ggsave(plot = last_plot(),"./figures/im_ev.png",width = 20, height = 40, units = "cm")

3.2.5 Enterotoxins and exotoxins

# ---- enterotoxin genes ----

enterotoxin_sau <- enterotoxin_long %>% filter(grepl("SAU",wgs_isolate_id)) 


enterotoxin_plot_1 <- enterotoxin_sau %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(enterotoxin_sau$wgs_isolate_id,enterotoxin_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

enterotoxin_sch <- enterotoxin_long %>% filter(grepl("SCH",wgs_isolate_id)) 


enterotoxin_plot_2 <- enterotoxin_sch %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(enterotoxin_sch$wgs_isolate_id,enterotoxin_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


enterotoxin_shaem <- enterotoxin_long %>% filter(grepl("SHAEM",wgs_isolate_id)) 


enterotoxin_plot_3 <- enterotoxin_shaem %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(enterotoxin_shaem$wgs_isolate_id,enterotoxin_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

enterotoxin_ssc <- enterotoxin_long %>% filter(grepl("MSC",wgs_isolate_id)) 

enterotoxin_plot_4 <- enterotoxin_ssc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(enterotoxin_ssc$wgs_isolate_id,enterotoxin_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Mammaliicoccus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

enterotoxin_ssuc <- enterotoxin_long %>% filter(grepl("SSUC",wgs_isolate_id)) 

enterotoxin_plot_5 <- enterotoxin_ssuc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(enterotoxin_ssuc$wgs_isolate_id,enterotoxin_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

enterotoxin_ssxyl <- enterotoxin_long %>% filter(grepl("SXYL",wgs_isolate_id)) 

enterotoxin_ssxyl$wgs_isolate_id <- fct_reorder(enterotoxin_ssxyl$wgs_isolate_id,enterotoxin_ssxyl$wgs_id)

enterotoxin_spxyl <- enterotoxin_long %>% filter(grepl("SPXYL",wgs_isolate_id)) 

levels(enterotoxin_long$wgs_isolate_id)
## NULL
enterotoxin_spxyl$wgs_isolate_id <- fct_reorder(enterotoxin_spxyl$wgs_isolate_id,enterotoxin_spxyl$wgs_id)


enterotoxin_sxyl_spxyl <- rbind(enterotoxin_ssxyl,enterotoxin_spxyl)                                 
#enterotoxin_ssxyl$wgs_isolate_id <- fct_reorder(enterotoxin_ssxyl$wgs_isolate_id,enterotoxin_ssxyl$wgs_id)

table(enterotoxin_long$wgs_isolate_id)
## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      59      59      59      59      59      59      59      59      59      59 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      59      59      59      59      59      59      59      59      59      59 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      59      59      59      59      59      59      59      59      59      59 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      59      59      59      59      59      59      59      59      59      59 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      59      59      59      59      59      59      59      59      59      59 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      59      59      59      59      59      59      59      59      59      59 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      59      59      59      59      59      59      59      59      59      59 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      59      59      59      59      59      59      59      59      59      59 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      59      59      59      59
enterotoxin_plot_6 <- enterotoxin_sxyl_spxyl %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


enterotoxin_sco <- enterotoxin_long %>% filter(grepl("SCO",wgs_isolate_id)) 

enterotoxin_sco$wgs_isolate_id <- fct_reorder(enterotoxin_sco$wgs_isolate_id,enterotoxin_sco$wgs_id)

enterotoxin_sdev <- enterotoxin_long %>% filter(grepl("SDEV",wgs_isolate_id)) 

levels(enterotoxin_long$wgs_isolate_id)
## NULL
enterotoxin_sdev$wgs_isolate_id <- fct_reorder(enterotoxin_sdev$wgs_isolate_id,enterotoxin_sdev$wgs_id)



enterotoxin_seq <- enterotoxin_long %>% filter(grepl("SEQ",wgs_isolate_id)) 

enterotoxin_seq$wgs_isolate_id <- fct_reorder(enterotoxin_seq$wgs_isolate_id,enterotoxin_seq$wgs_id)


enterotoxin_other <- rbind(enterotoxin_sco,enterotoxin_sdev,enterotoxin_seq)                                 

table(enterotoxin_long$wgs_isolate_id)
## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      59      59      59      59      59      59      59      59      59      59 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      59      59      59      59      59      59      59      59      59      59 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      59      59      59      59      59      59      59      59      59      59 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      59      59      59      59      59      59      59      59      59      59 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      59      59      59      59      59      59      59      59      59      59 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      59      59      59      59      59      59      59      59      59      59 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      59      59      59      59      59      59      59      59      59      59 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      59      59      59      59      59      59      59      59      59      59 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      59      59      59      59
enterotoxin_plot_7 <- enterotoxin_other %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Enterotoxins & exotoxins",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x = element_text(angle = 45, hjust = 1))




# Multipanel figure

enterotoxin_plot <- ggarrange(enterotoxin_plot_1 + rremove("xlab")+ rremove("ylab"),enterotoxin_plot_2 + rremove("xlab")+ rremove("ylab"),enterotoxin_plot_3 + rremove("xlab")+ rremove("ylab"),enterotoxin_plot_4 + rremove("xlab")+ rremove("ylab"),enterotoxin_plot_5 + rremove("xlab")+ rremove("ylab"),enterotoxin_plot_6 + rremove("xlab")+ rremove("ylab"),enterotoxin_plot_7 + rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2.25))

annotate_figure(enterotoxin_plot,
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/enterotoxin.pdf",width = 50, height = 40, units = "cm")
ggsave(plot = last_plot(),"./figures/enterotoxin.png",width = 50, height = 40, units = "cm")

3.2.6 Other toxins

# ---- toxin genes ----


toxin_sau <- toxin_long %>% filter(grepl("SAU",wgs_isolate_id)) 


toxin_plot_1 <- toxin_sau %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(toxin_sau$wgs_isolate_id,toxin_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




toxin_sch <- toxin_long %>% filter(grepl("SCH",wgs_isolate_id)) 


toxin_plot_2 <- toxin_sch %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(toxin_sch$wgs_isolate_id,toxin_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


toxin_shaem <- toxin_long %>% filter(grepl("SHAEM",wgs_isolate_id)) 


toxin_plot_3 <- toxin_shaem %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(toxin_shaem$wgs_isolate_id,toxin_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




toxin_ssc <- toxin_long %>% filter(grepl("MSC",wgs_isolate_id)) 


toxin_plot_4 <- toxin_ssc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(toxin_ssc$wgs_isolate_id,toxin_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Mammaliicoccus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())





toxin_ssuc <- toxin_long %>% filter(grepl("SSUC",wgs_isolate_id)) 




toxin_plot_5 <- toxin_ssuc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(toxin_ssuc$wgs_isolate_id,toxin_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())



toxin_ssxyl <- toxin_long %>% filter(grepl("SXYL",wgs_isolate_id)) 

toxin_ssxyl$wgs_isolate_id <- fct_reorder(toxin_ssxyl$wgs_isolate_id,toxin_ssxyl$wgs_id)

toxin_spxyl <- toxin_long %>% filter(grepl("SPXYL",wgs_isolate_id)) 

levels(toxin_long$wgs_isolate_id)
## NULL
toxin_spxyl$wgs_isolate_id <- fct_reorder(toxin_spxyl$wgs_isolate_id,toxin_spxyl$wgs_id)


toxin_sxyl_spxyl <- rbind(toxin_ssxyl,toxin_spxyl)                                 
#toxin_ssxyl$wgs_isolate_id <- fct_reorder(toxin_ssxyl$wgs_isolate_id,toxin_ssxyl$wgs_id)

table(toxin_long$wgs_isolate_id)
## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      30      30      30      30      30      30      30      30      30      30 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      30      30      30      30      30      30      30      30      30      30 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      30      30      30      30      30      30      30      30      30      30 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      30      30      30      30      30      30      30      30      30      30 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      30      30      30      30      30      30      30      30      30      30 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      30      30      30      30      30      30      30      30      30      30 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      30      30      30      30      30      30      30      30      30      30 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      30      30      30      30      30      30      30      30      30      30 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      30      30      30      30
toxin_plot_6 <- toxin_sxyl_spxyl %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


toxin_sco <- toxin_long %>% filter(grepl("SCO",wgs_isolate_id)) 

toxin_sco$wgs_isolate_id <- fct_reorder(toxin_sco$wgs_isolate_id,toxin_sco$wgs_id)

toxin_sdev <- toxin_long %>% filter(grepl("SDEV",wgs_isolate_id)) 

levels(toxin_long$wgs_isolate_id)
## NULL
toxin_sdev$wgs_isolate_id <- fct_reorder(toxin_sdev$wgs_isolate_id,toxin_sdev$wgs_id)



toxin_seq <- toxin_long %>% filter(grepl("SEQ",wgs_isolate_id)) 

toxin_seq$wgs_isolate_id <- fct_reorder(toxin_seq$wgs_isolate_id,toxin_seq$wgs_id)


toxin_other <- rbind(toxin_sco,toxin_sdev,toxin_seq)                                 

table(toxin_long$wgs_isolate_id)
## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      30      30      30      30      30      30      30      30      30      30 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      30      30      30      30      30      30      30      30      30      30 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      30      30      30      30      30      30      30      30      30      30 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      30      30      30      30      30      30      30      30      30      30 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      30      30      30      30      30      30      30      30      30      30 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      30      30      30      30      30      30      30      30      30      30 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      30      30      30      30      30      30      30      30      30      30 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      30      30      30      30      30      30      30      30      30      30 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      30      30      30      30
toxin_plot_7 <- toxin_other %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Other toxins",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x = element_text(angle = 45, hjust = 1))




# Multipanel figure

toxin_plot <- ggarrange(toxin_plot_1 + rremove("xlab")+ rremove("ylab"),toxin_plot_2 + rremove("xlab")+ rremove("ylab"),toxin_plot_3 + rremove("xlab")+ rremove("ylab"),toxin_plot_4 + rremove("xlab")+ rremove("ylab"),toxin_plot_5 + rremove("xlab")+ rremove("ylab"),toxin_plot_6 + rremove("xlab")+ rremove("ylab"),toxin_plot_7 + rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2.25))

annotate_figure(toxin_plot,
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/toxin.pdf",width = 20, height = 40, units = "cm")
ggsave(plot = last_plot(),"./figures/toxin.png",width = 20, height = 40, units = "cm")


enterotoxin_toxin_plot <- ggarrange(
  enterotoxin_plot_1 + rremove("xlab") + rremove("ylab"),
  toxin_plot_1 + rremove("xlab") + rremove("ylab") ,
  enterotoxin_plot_2 + rremove("xlab") + rremove("ylab"),
  toxin_plot_2 + rremove("xlab") + rremove("ylab") ,
  enterotoxin_plot_3 + rremove("xlab") + rremove("ylab"),
  toxin_plot_3 + rremove("xlab") + rremove("ylab") ,
  enterotoxin_plot_4 + rremove("xlab") + rremove("ylab"),
  toxin_plot_4 + rremove("xlab") + rremove("ylab") ,
  enterotoxin_plot_5 + rremove("xlab") + rremove("ylab"),
  toxin_plot_5 + rremove("xlab") + rremove("ylab") ,
  enterotoxin_plot_6 + rremove("xlab") + rremove("ylab"),
  toxin_plot_6 + rremove("xlab") + rremove("ylab") ,
  enterotoxin_plot_7 + rremove("ylab"),
  toxin_plot_7 + rremove("ylab") ,
  ncol = 2, nrow = 7, common.legend = TRUE, align = "v",
  labels = c("A1", "A2", "B1", "B2", "C1", "C2", "D1", "D2", "E1", "E2", "F1", "F2", "G1", "G2"),
  heights = c(1.25,4.5, 4.25, 1.75,3,3.5, 2.25))

annotate_figure(enterotoxin_toxin_plot,
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/figure_10.pdf",width = 50, height = 40, units = "cm",dpi=600)
ggsave(plot = last_plot(),"./figures/figure_10.png",width = 50, height = 40, units = "cm",dpi=600)
ggsave(plot = last_plot(),"./figures/figure_10.jpeg",width = 50, height = 40, units = "cm",dpi=600)

4 AMR genes and plasmids

4.1 Summary

# Presence of AMR related genes

amr_long$sau <- ifelse(amr_long$wgs_species_3 == "SAU", 1, 0)

amr_long <- amr_long %>% filter(wgs_species_3!="SUB")

amr_long_summary <- amr_long %>% group_by(gene_abbreviation,wgs_species_3) %>% summarise(n=max(presence)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")
## `summarise()` has grouped output by 'gene_abbreviation'. You can override using
## the `.groups` argument.
amr_long_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n))
## # A tibble: 10 × 2
##    wgs_species_3 n_genes
##    <chr>           <dbl>
##  1 MSC                 4
##  2 SAU                15
##  3 SCH                 1
##  4 SCO                 6
##  5 SDEV                3
##  6 SEQ                 2
##  7 SHAEM               4
##  8 SPXYL               6
##  9 SSUC                1
## 10 SXYL                4
table1(~presence_gene|gene_abbreviation,data=amr_long)
## Warning in .table1.internal(x = x, labels = labels, groupspan = groupspan, :
## Table has 29 columns. Are you sure this is what you want?
AAC3
(N=83)
ANT9
(N=83)
APH3-PRIME
(N=83)
ARLR
(N=83)
ARLS
(N=83)
BLAZ
(N=83)
DHAP
(N=83)
ERM
(N=83)
FOSB
(N=83)
FOSD
(N=83)
FUSF
(N=83)
LMRS
(N=83)
LNUA
(N=83)
MECA
(N=83)
MECC
(N=83)
MECI
(N=83)
MEPA
(N=83)
MEPB
(N=83)
MEPR
(N=83)
MGRA
(N=83)
MPHC
(N=83)
MSRA
(N=83)
NORA
(N=83)
NORB
(N=83)
RLMH
(N=83)
SALA
(N=83)
TET38
(N=83)
TETK
(N=83)
Overall
(N=2324)
presence_gene
No 80 (96.4%) 82 (98.8%) 41 (49.4%) 80 (96.4%) 80 (96.4%) 78 (94.0%) 80 (96.4%) 78 (94.0%) 81 (97.6%) 81 (97.6%) 82 (98.8%) 80 (96.4%) 77 (92.8%) 77 (92.8%) 78 (94.0%) 78 (94.0%) 80 (96.4%) 80 (96.4%) 80 (96.4%) 59 (71.1%) 69 (83.1%) 82 (98.8%) 80 (96.4%) 80 (96.4%) 39 (47.0%) 77 (92.8%) 80 (96.4%) 82 (98.8%) 2121 (91.3%)
Yes 3 (3.6%) 1 (1.2%) 42 (50.6%) 3 (3.6%) 3 (3.6%) 5 (6.0%) 3 (3.6%) 5 (6.0%) 2 (2.4%) 2 (2.4%) 1 (1.2%) 3 (3.6%) 6 (7.2%) 6 (7.2%) 5 (6.0%) 5 (6.0%) 3 (3.6%) 3 (3.6%) 3 (3.6%) 24 (28.9%) 14 (16.9%) 1 (1.2%) 3 (3.6%) 3 (3.6%) 44 (53.0%) 6 (7.2%) 3 (3.6%) 1 (1.2%) 203 (8.7%)
table1(~presence_gene|wgs_species_3:gene_abbreviation,data=amr_long)
## Warning in .table1.internal(x = x, labels = labels, groupspan = groupspan, :
## Table has 308 columns. Are you sure this is what you want?
MSC
SAU
SCH
SCO
SDEV
SEQ
SHAEM
SPXYL
SSUC
SXYL
Overall
AAC3
(N=6)
ANT9
(N=6)
APH3-PRIME
(N=6)
ARLR
(N=6)
ARLS
(N=6)
BLAZ
(N=6)
DHAP
(N=6)
ERM
(N=6)
FOSB
(N=6)
FOSD
(N=6)
FUSF
(N=6)
LMRS
(N=6)
LNUA
(N=6)
MECA
(N=6)
MECC
(N=6)
MECI
(N=6)
MEPA
(N=6)
MEPB
(N=6)
MEPR
(N=6)
MGRA
(N=6)
MPHC
(N=6)
MSRA
(N=6)
NORA
(N=6)
NORB
(N=6)
RLMH
(N=6)
SALA
(N=6)
TET38
(N=6)
TETK
(N=6)
AAC3
(N=3)
ANT9
(N=3)
APH3-PRIME
(N=3)
ARLR
(N=3)
ARLS
(N=3)
BLAZ
(N=3)
DHAP
(N=3)
ERM
(N=3)
FOSB
(N=3)
FOSD
(N=3)
FUSF
(N=3)
LMRS
(N=3)
LNUA
(N=3)
MECA
(N=3)
MECC
(N=3)
MECI
(N=3)
MEPA
(N=3)
MEPB
(N=3)
MEPR
(N=3)
MGRA
(N=3)
MPHC
(N=3)
MSRA
(N=3)
NORA
(N=3)
NORB
(N=3)
RLMH
(N=3)
SALA
(N=3)
TET38
(N=3)
TETK
(N=3)
AAC3
(N=21)
ANT9
(N=21)
APH3-PRIME
(N=21)
ARLR
(N=21)
ARLS
(N=21)
BLAZ
(N=21)
DHAP
(N=21)
ERM
(N=21)
FOSB
(N=21)
FOSD
(N=21)
FUSF
(N=21)
LMRS
(N=21)
LNUA
(N=21)
MECA
(N=21)
MECC
(N=21)
MECI
(N=21)
MEPA
(N=21)
MEPB
(N=21)
MEPR
(N=21)
MGRA
(N=21)
MPHC
(N=21)
MSRA
(N=21)
NORA
(N=21)
NORB
(N=21)
RLMH
(N=21)
SALA
(N=21)
TET38
(N=21)
TETK
(N=21)
AAC3
(N=1)
ANT9
(N=1)
APH3-PRIME
(N=1)
ARLR
(N=1)
ARLS
(N=1)
BLAZ
(N=1)
DHAP
(N=1)
ERM
(N=1)
FOSB
(N=1)
FOSD
(N=1)
FUSF
(N=1)
LMRS
(N=1)
LNUA
(N=1)
MECA
(N=1)
MECC
(N=1)
MECI
(N=1)
MEPA
(N=1)
MEPB
(N=1)
MEPR
(N=1)
MGRA
(N=1)
MPHC
(N=1)
MSRA
(N=1)
NORA
(N=1)
NORB
(N=1)
RLMH
(N=1)
SALA
(N=1)
TET38
(N=1)
TETK
(N=1)
AAC3
(N=2)
ANT9
(N=2)
APH3-PRIME
(N=2)
ARLR
(N=2)
ARLS
(N=2)
BLAZ
(N=2)
DHAP
(N=2)
ERM
(N=2)
FOSB
(N=2)
FOSD
(N=2)
FUSF
(N=2)
LMRS
(N=2)
LNUA
(N=2)
MECA
(N=2)
MECC
(N=2)
MECI
(N=2)
MEPA
(N=2)
MEPB
(N=2)
MEPR
(N=2)
MGRA
(N=2)
MPHC
(N=2)
MSRA
(N=2)
NORA
(N=2)
NORB
(N=2)
RLMH
(N=2)
SALA
(N=2)
TET38
(N=2)
TETK
(N=2)
AAC3
(N=1)
ANT9
(N=1)
APH3-PRIME
(N=1)
ARLR
(N=1)
ARLS
(N=1)
BLAZ
(N=1)
DHAP
(N=1)
ERM
(N=1)
FOSB
(N=1)
FOSD
(N=1)
FUSF
(N=1)
LMRS
(N=1)
LNUA
(N=1)
MECA
(N=1)
MECC
(N=1)
MECI
(N=1)
MEPA
(N=1)
MEPB
(N=1)
MEPR
(N=1)
MGRA
(N=1)
MPHC
(N=1)
MSRA
(N=1)
NORA
(N=1)
NORB
(N=1)
RLMH
(N=1)
SALA
(N=1)
TET38
(N=1)
TETK
(N=1)
AAC3
(N=19)
ANT9
(N=19)
APH3-PRIME
(N=19)
ARLR
(N=19)
ARLS
(N=19)
BLAZ
(N=19)
DHAP
(N=19)
ERM
(N=19)
FOSB
(N=19)
FOSD
(N=19)
FUSF
(N=19)
LMRS
(N=19)
LNUA
(N=19)
MECA
(N=19)
MECC
(N=19)
MECI
(N=19)
MEPA
(N=19)
MEPB
(N=19)
MEPR
(N=19)
MGRA
(N=19)
MPHC
(N=19)
MSRA
(N=19)
NORA
(N=19)
NORB
(N=19)
RLMH
(N=19)
SALA
(N=19)
TET38
(N=19)
TETK
(N=19)
AAC3
(N=5)
ANT9
(N=5)
APH3-PRIME
(N=5)
ARLR
(N=5)
ARLS
(N=5)
BLAZ
(N=5)
DHAP
(N=5)
ERM
(N=5)
FOSB
(N=5)
FOSD
(N=5)
FUSF
(N=5)
LMRS
(N=5)
LNUA
(N=5)
MECA
(N=5)
MECC
(N=5)
MECI
(N=5)
MEPA
(N=5)
MEPB
(N=5)
MEPR
(N=5)
MGRA
(N=5)
MPHC
(N=5)
MSRA
(N=5)
NORA
(N=5)
NORB
(N=5)
RLMH
(N=5)
SALA
(N=5)
TET38
(N=5)
TETK
(N=5)
AAC3
(N=12)
ANT9
(N=12)
APH3-PRIME
(N=12)
ARLR
(N=12)
ARLS
(N=12)
BLAZ
(N=12)
DHAP
(N=12)
ERM
(N=12)
FOSB
(N=12)
FOSD
(N=12)
FUSF
(N=12)
LMRS
(N=12)
LNUA
(N=12)
MECA
(N=12)
MECC
(N=12)
MECI
(N=12)
MEPA
(N=12)
MEPB
(N=12)
MEPR
(N=12)
MGRA
(N=12)
MPHC
(N=12)
MSRA
(N=12)
NORA
(N=12)
NORB
(N=12)
RLMH
(N=12)
SALA
(N=12)
TET38
(N=12)
TETK
(N=12)
AAC3
(N=13)
ANT9
(N=13)
APH3-PRIME
(N=13)
ARLR
(N=13)
ARLS
(N=13)
BLAZ
(N=13)
DHAP
(N=13)
ERM
(N=13)
FOSB
(N=13)
FOSD
(N=13)
FUSF
(N=13)
LMRS
(N=13)
LNUA
(N=13)
MECA
(N=13)
MECC
(N=13)
MECI
(N=13)
MEPA
(N=13)
MEPB
(N=13)
MEPR
(N=13)
MGRA
(N=13)
MPHC
(N=13)
MSRA
(N=13)
NORA
(N=13)
NORB
(N=13)
RLMH
(N=13)
SALA
(N=13)
TET38
(N=13)
TETK
(N=13)
AAC3
(N=83)
ANT9
(N=83)
APH3-PRIME
(N=83)
ARLR
(N=83)
ARLS
(N=83)
BLAZ
(N=83)
DHAP
(N=83)
ERM
(N=83)
FOSB
(N=83)
FOSD
(N=83)
FUSF
(N=83)
LMRS
(N=83)
LNUA
(N=83)
MECA
(N=83)
MECC
(N=83)
MECI
(N=83)
MEPA
(N=83)
MEPB
(N=83)
MEPR
(N=83)
MGRA
(N=83)
MPHC
(N=83)
MSRA
(N=83)
NORA
(N=83)
NORB
(N=83)
RLMH
(N=83)
SALA
(N=83)
TET38
(N=83)
TETK
(N=83)
presence_gene
No 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 5 (83.3%) 6 (100%) 6 (100%) 4 (66.7%) 0 (0%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 0 (0%) 6 (100%) 6 (100%) 0 (0%) 3 (100%) 0 (0%) 0 (0%) 0 (0%) 3 (100%) 0 (0%) 3 (100%) 2 (66.7%) 3 (100%) 3 (100%) 0 (0%) 3 (100%) 3 (100%) 3 (100%) 3 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 3 (100%) 3 (100%) 0 (0%) 0 (0%) 0 (0%) 3 (100%) 0 (0%) 3 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 1 (4.8%) 21 (100%) 21 (100%) 21 (100%) 1 (100%) 0 (0%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 0 (0%) 1 (100%) 0 (0%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 0 (0%) 0 (0%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 0 (0%) 2 (100%) 2 (100%) 0 (0%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 0 (0%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 0 (0%) 2 (100%) 2 (100%) 2 (100%) 1 (100%) 1 (100%) 0 (0%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 0 (0%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 19 (100%) 19 (100%) 0 (0%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 16 (84.2%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 0 (0%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 0 (0%) 19 (100%) 19 (100%) 19 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 0 (0%) 5 (100%) 1 (20.0%) 4 (80.0%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 0 (0%) 0 (0%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 0 (0%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 12 (100%) 12 (100%) 0 (0%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 13 (100%) 13 (100%) 8 (61.5%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 12 (92.3%) 13 (100%) 12 (92.3%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 6 (46.2%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 80 (96.4%) 82 (98.8%) 41 (49.4%) 80 (96.4%) 80 (96.4%) 78 (94.0%) 80 (96.4%) 78 (94.0%) 81 (97.6%) 81 (97.6%) 82 (98.8%) 80 (96.4%) 77 (92.8%) 77 (92.8%) 78 (94.0%) 78 (94.0%) 80 (96.4%) 80 (96.4%) 80 (96.4%) 59 (71.1%) 69 (83.1%) 82 (98.8%) 80 (96.4%) 80 (96.4%) 39 (47.0%) 77 (92.8%) 80 (96.4%) 82 (98.8%)
Yes 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (16.7%) 0 (0%) 0 (0%) 2 (33.3%) 6 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 6 (100%) 0 (0%) 0 (0%) 3 (100%) 0 (0%) 3 (100%) 3 (100%) 3 (100%) 0 (0%) 3 (100%) 0 (0%) 1 (33.3%) 0 (0%) 0 (0%) 3 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 3 (100%) 3 (100%) 3 (100%) 3 (100%) 0 (0%) 0 (0%) 3 (100%) 3 (100%) 3 (100%) 0 (0%) 3 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 20 (95.2%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (100%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 2 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 2 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 2 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 19 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 3 (15.8%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 19 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 19 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 5 (100%) 0 (0%) 4 (80.0%) 1 (20.0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 5 (100%) 5 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 5 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 12 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 5 (38.5%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (7.7%) 0 (0%) 1 (7.7%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 7 (53.8%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 3 (3.6%) 1 (1.2%) 42 (50.6%) 3 (3.6%) 3 (3.6%) 5 (6.0%) 3 (3.6%) 5 (6.0%) 2 (2.4%) 2 (2.4%) 1 (1.2%) 3 (3.6%) 6 (7.2%) 6 (7.2%) 5 (6.0%) 5 (6.0%) 3 (3.6%) 3 (3.6%) 3 (3.6%) 24 (28.9%) 14 (16.9%) 1 (1.2%) 3 (3.6%) 3 (3.6%) 44 (53.0%) 6 (7.2%) 3 (3.6%) 1 (1.2%)
# Aminoglycosides

table1(~presence_gene_type_isolate|wgs_species_3,data=amr_aminoglycosides_isolate)
MSC
(N=6)
SAU
(N=3)
SCH
(N=21)
SCO
(N=1)
SDEV
(N=2)
SEQ
(N=1)
SHAEM
(N=19)
SPXYL
(N=5)
SSUC
(N=12)
SUB
(N=1)
SXYL
(N=13)
Overall
(N=84)
presence_gene_type_isolate
0 6 (100%) 0 (0%) 21 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 5 (100%) 0 (0%) 1 (100%) 8 (61.5%) 41 (48.8%)
1 0 (0%) 3 (100%) 0 (0%) 1 (100%) 2 (100%) 1 (100%) 19 (100%) 0 (0%) 12 (100%) 0 (0%) 5 (38.5%) 43 (51.2%)
# Betalactams

table1(~presence_gene_type_isolate|wgs_species_3,data=amr_betalactams_isolate)
MSC
(N=6)
SAU
(N=3)
SCH
(N=21)
SCO
(N=1)
SDEV
(N=2)
SEQ
(N=1)
SHAEM
(N=19)
SPXYL
(N=5)
SSUC
(N=12)
SUB
(N=1)
SXYL
(N=13)
Overall
(N=84)
presence_gene_type_isolate
0 0 (0%) 3 (100%) 21 (100%) 1 (100%) 2 (100%) 1 (100%) 19 (100%) 0 (0%) 12 (100%) 1 (100%) 13 (100%) 73 (86.9%)
1 6 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 5 (100%) 0 (0%) 0 (0%) 0 (0%) 11 (13.1%)
# Fosfomycin

table1(~presence_gene_type_isolate|wgs_species_3,data=amr_fosfomycin_isolate)
MSC
(N=6)
SAU
(N=3)
SCH
(N=21)
SCO
(N=1)
SDEV
(N=2)
SEQ
(N=1)
SHAEM
(N=19)
SPXYL
(N=5)
SSUC
(N=12)
SUB
(N=1)
SXYL
(N=13)
Overall
(N=84)
presence_gene_type_isolate
0 5 (83.3%) 2 (66.7%) 21 (100%) 1 (100%) 2 (100%) 1 (100%) 19 (100%) 4 (80.0%) 12 (100%) 1 (100%) 12 (92.3%) 80 (95.2%)
1 1 (16.7%) 1 (33.3%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (20.0%) 0 (0%) 0 (0%) 1 (7.7%) 4 (4.8%)
# Fusidic acid

table1(~presence_gene_type_isolate|wgs_species_3,data=amr_fusidic_acid_isolate)
MSC
(N=6)
SAU
(N=3)
SCH
(N=21)
SCO
(N=1)
SDEV
(N=2)
SEQ
(N=1)
SHAEM
(N=19)
SPXYL
(N=5)
SSUC
(N=12)
SUB
(N=1)
SXYL
(N=13)
Overall
(N=84)
presence_gene_type_isolate
0 6 (100%) 3 (100%) 21 (100%) 0 (0%) 2 (100%) 1 (100%) 19 (100%) 5 (100%) 12 (100%) 1 (100%) 13 (100%) 83 (98.8%)
1 0 (0%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (1.2%)
# MLS

table1(~presence_gene_type_isolate|wgs_species_3,data=amr_mls_isolate)
MSC
(N=6)
SAU
(N=3)
SCH
(N=21)
SCO
(N=1)
SDEV
(N=2)
SEQ
(N=1)
SHAEM
(N=19)
SPXYL
(N=5)
SSUC
(N=12)
SUB
(N=1)
SXYL
(N=13)
Overall
(N=84)
presence_gene_type_isolate
0 4 (66.7%) 0 (0%) 1 (4.8%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 12 (100%) 1 (100%) 6 (46.2%) 24 (28.6%)
1 2 (33.3%) 3 (100%) 20 (95.2%) 1 (100%) 2 (100%) 1 (100%) 19 (100%) 5 (100%) 0 (0%) 0 (0%) 7 (53.8%) 60 (71.4%)
# Multicompound resistance

table1(~presence_gene_type_isolate|wgs_species_3,data=amr_multicompound_isolate)
MSC
(N=6)
SAU
(N=3)
SCH
(N=21)
SCO
(N=1)
SDEV
(N=2)
SEQ
(N=1)
SHAEM
(N=19)
SPXYL
(N=5)
SSUC
(N=12)
SUB
(N=1)
SXYL
(N=13)
Overall
(N=84)
presence_gene_type_isolate
0 6 (100%) 0 (0%) 21 (100%) 1 (100%) 2 (100%) 1 (100%) 19 (100%) 5 (100%) 12 (100%) 1 (100%) 13 (100%) 81 (96.4%)
1 0 (0%) 3 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 3 (3.6%)
# Multidrug resistance

table1(~presence_gene_type_isolate|wgs_species_3,data=amr_multidrug_isolate)
MSC
(N=6)
SAU
(N=3)
SCH
(N=21)
SCO
(N=1)
SDEV
(N=2)
SEQ
(N=1)
SHAEM
(N=19)
SPXYL
(N=5)
SSUC
(N=12)
SUB
(N=1)
SXYL
(N=13)
Overall
(N=84)
presence_gene_type_isolate
0 0 (0%) 0 (0%) 21 (100%) 1 (100%) 0 (0%) 1 (100%) 0 (0%) 5 (100%) 12 (100%) 1 (100%) 13 (100%) 54 (64.3%)
1 6 (100%) 3 (100%) 0 (0%) 0 (0%) 2 (100%) 0 (0%) 19 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 30 (35.7%)
# Phenicol

table1(~presence_gene_type_isolate|wgs_species_3,data=amr_phenicol_isolate)
MSC
(N=6)
SAU
(N=3)
SCH
(N=21)
SCO
(N=1)
SDEV
(N=2)
SEQ
(N=1)
SHAEM
(N=19)
SPXYL
(N=5)
SSUC
(N=12)
SUB
(N=1)
SXYL
(N=13)
Overall
(N=84)
presence_gene_type_isolate
0 6 (100%) 0 (0%) 21 (100%) 1 (100%) 2 (100%) 1 (100%) 19 (100%) 5 (100%) 12 (100%) 1 (100%) 13 (100%) 81 (96.4%)
1 0 (0%) 3 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 3 (3.6%)
# Tetracylines

table1(~presence_gene_type_isolate|wgs_species_3,data=amr_tetracyclines_isolate)
MSC
(N=6)
SAU
(N=3)
SCH
(N=21)
SCO
(N=1)
SDEV
(N=2)
SEQ
(N=1)
SHAEM
(N=19)
SPXYL
(N=5)
SSUC
(N=12)
SUB
(N=1)
SXYL
(N=13)
Overall
(N=84)
presence_gene_type_isolate
0 6 (100%) 0 (0%) 21 (100%) 0 (0%) 2 (100%) 1 (100%) 19 (100%) 5 (100%) 12 (100%) 1 (100%) 13 (100%) 80 (95.2%)
1 0 (0%) 3 (100%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 4 (4.8%)
# --- Plasmids ----

plasmid_summary <- plasmid_long %>%
  group_by(wgs_isolate_id,wgs_species_3) %>%
  summarise(n_plasmids=as.factor(sum(presence)),presence_plasmid=as.factor(ifelse(sum(presence)>0,1,0)))
## `summarise()` has grouped output by 'wgs_isolate_id'. You can override using
## the `.groups` argument.
table1::table1(~n_plasmids + presence_plasmid|wgs_species_3, data= plasmid_summary)
MSC
(N=6)
SAU
(N=3)
SCH
(N=21)
SCO
(N=1)
SDEV
(N=2)
SEQ
(N=1)
SHAEM
(N=19)
SPXYL
(N=5)
SSUC
(N=12)
SXYL
(N=13)
Overall
(N=83)
n_plasmids
0 3 (50.0%) 0 (0%) 20 (95.2%) 0 (0%) 0 (0%) 0 (0%) 9 (47.4%) 0 (0%) 6 (50.0%) 0 (0%) 38 (45.8%)
1 2 (33.3%) 2 (66.7%) 1 (4.8%) 0 (0%) 1 (50.0%) 0 (0%) 5 (26.3%) 0 (0%) 4 (33.3%) 6 (46.2%) 21 (25.3%)
2 1 (16.7%) 0 (0%) 0 (0%) 0 (0%) 1 (50.0%) 1 (100%) 4 (21.1%) 5 (100%) 2 (16.7%) 6 (46.2%) 20 (24.1%)
3 0 (0%) 1 (33.3%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (5.3%) 0 (0%) 0 (0%) 1 (7.7%) 3 (3.6%)
6 0 (0%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (1.2%)
presence_plasmid
0 3 (50.0%) 0 (0%) 20 (95.2%) 0 (0%) 0 (0%) 0 (0%) 9 (47.4%) 0 (0%) 6 (50.0%) 0 (0%) 38 (45.8%)
1 3 (50.0%) 3 (100%) 1 (4.8%) 1 (100%) 2 (100%) 1 (100%) 10 (52.6%) 5 (100%) 6 (50.0%) 13 (100%) 45 (54.2%)
table1::table1(~presence_gene|gene_abbreviation, data= plasmid_long)
## Warning in .table1.internal(x = x, labels = labels, groupspan = groupspan, :
## Table has 30 columns. Are you sure this is what you want?
Cassette
(N=83)
pETB
(N=83)
pKH13
(N=83)
pKH21
(N=83)
pLNU1
(N=83)
pLNU3
(N=83)
pLNU9
(N=83)
pLUG10
(N=83)
pN315
(N=83)
pRJ9
(N=83)
pS0385
(N=83)
pSaa6159
(N=83)
pSAS
(N=83)
pSE12228p05
(N=83)
pSHaeA
(N=83)
pSJH101
(N=83)
pSK1
(N=83)
pSK41
(N=83)
pSSAP2
(N=83)
pSSP1
(N=83)
pSSP2
(N=83)
pTW20
(N=83)
pvSw4
(N=83)
SAP016A
(N=83)
SAP019A
(N=83)
SAP099B
(N=166)
SAP105A
(N=83)
SAP108C
(N=83)
VRSAp
(N=83)
Overall
(N=2490)
presence_gene
No 82 (98.8%) 82 (98.8%) 82 (98.8%) 82 (98.8%) 82 (98.8%) 82 (98.8%) 78 (94.0%) 82 (98.8%) 82 (98.8%) 81 (97.6%) 82 (98.8%) 79 (95.2%) 82 (98.8%) 81 (97.6%) 82 (98.8%) 74 (89.2%) 82 (98.8%) 82 (98.8%) 73 (88.0%) 82 (98.8%) 80 (96.4%) 81 (97.6%) 74 (89.2%) 77 (92.8%) 82 (98.8%) 164 (98.8%) 79 (95.2%) 81 (97.6%) 82 (98.8%) 2414 (96.9%)
Yes 1 (1.2%) 1 (1.2%) 1 (1.2%) 1 (1.2%) 1 (1.2%) 1 (1.2%) 5 (6.0%) 1 (1.2%) 1 (1.2%) 2 (2.4%) 1 (1.2%) 4 (4.8%) 1 (1.2%) 2 (2.4%) 1 (1.2%) 9 (10.8%) 1 (1.2%) 1 (1.2%) 10 (12.0%) 1 (1.2%) 3 (3.6%) 2 (2.4%) 9 (10.8%) 6 (7.2%) 1 (1.2%) 2 (1.2%) 4 (4.8%) 2 (2.4%) 1 (1.2%) 76 (3.1%)
table1::table1(~presence_gene|wgs_species_3:gene_abbreviation, data= plasmid_long)
## Warning in .table1.internal(x = x, labels = labels, groupspan = groupspan, :
## Table has 319 columns. Are you sure this is what you want?
MSC
SAU
SCH
SCO
SDEV
SEQ
SHAEM
SPXYL
SSUC
SXYL
Overall
Cassette
(N=6)
pETB
(N=6)
pKH13
(N=6)
pKH21
(N=6)
pLNU1
(N=6)
pLNU3
(N=6)
pLNU9
(N=6)
pLUG10
(N=6)
pN315
(N=6)
pRJ9
(N=6)
pS0385
(N=6)
pSaa6159
(N=6)
pSAS
(N=6)
pSE12228p05
(N=6)
pSHaeA
(N=6)
pSJH101
(N=6)
pSK1
(N=6)
pSK41
(N=6)
pSSAP2
(N=6)
pSSP1
(N=6)
pSSP2
(N=6)
pTW20
(N=6)
pvSw4
(N=6)
SAP016A
(N=6)
SAP019A
(N=6)
SAP099B
(N=12)
SAP105A
(N=6)
SAP108C
(N=6)
VRSAp
(N=6)
Cassette
(N=3)
pETB
(N=3)
pKH13
(N=3)
pKH21
(N=3)
pLNU1
(N=3)
pLNU3
(N=3)
pLNU9
(N=3)
pLUG10
(N=3)
pN315
(N=3)
pRJ9
(N=3)
pS0385
(N=3)
pSaa6159
(N=3)
pSAS
(N=3)
pSE12228p05
(N=3)
pSHaeA
(N=3)
pSJH101
(N=3)
pSK1
(N=3)
pSK41
(N=3)
pSSAP2
(N=3)
pSSP1
(N=3)
pSSP2
(N=3)
pTW20
(N=3)
pvSw4
(N=3)
SAP016A
(N=3)
SAP019A
(N=3)
SAP099B
(N=6)
SAP105A
(N=3)
SAP108C
(N=3)
VRSAp
(N=3)
Cassette
(N=21)
pETB
(N=21)
pKH13
(N=21)
pKH21
(N=21)
pLNU1
(N=21)
pLNU3
(N=21)
pLNU9
(N=21)
pLUG10
(N=21)
pN315
(N=21)
pRJ9
(N=21)
pS0385
(N=21)
pSaa6159
(N=21)
pSAS
(N=21)
pSE12228p05
(N=21)
pSHaeA
(N=21)
pSJH101
(N=21)
pSK1
(N=21)
pSK41
(N=21)
pSSAP2
(N=21)
pSSP1
(N=21)
pSSP2
(N=21)
pTW20
(N=21)
pvSw4
(N=21)
SAP016A
(N=21)
SAP019A
(N=21)
SAP099B
(N=42)
SAP105A
(N=21)
SAP108C
(N=21)
VRSAp
(N=21)
Cassette
(N=1)
pETB
(N=1)
pKH13
(N=1)
pKH21
(N=1)
pLNU1
(N=1)
pLNU3
(N=1)
pLNU9
(N=1)
pLUG10
(N=1)
pN315
(N=1)
pRJ9
(N=1)
pS0385
(N=1)
pSaa6159
(N=1)
pSAS
(N=1)
pSE12228p05
(N=1)
pSHaeA
(N=1)
pSJH101
(N=1)
pSK1
(N=1)
pSK41
(N=1)
pSSAP2
(N=1)
pSSP1
(N=1)
pSSP2
(N=1)
pTW20
(N=1)
pvSw4
(N=1)
SAP016A
(N=1)
SAP019A
(N=1)
SAP099B
(N=2)
SAP105A
(N=1)
SAP108C
(N=1)
VRSAp
(N=1)
Cassette
(N=2)
pETB
(N=2)
pKH13
(N=2)
pKH21
(N=2)
pLNU1
(N=2)
pLNU3
(N=2)
pLNU9
(N=2)
pLUG10
(N=2)
pN315
(N=2)
pRJ9
(N=2)
pS0385
(N=2)
pSaa6159
(N=2)
pSAS
(N=2)
pSE12228p05
(N=2)
pSHaeA
(N=2)
pSJH101
(N=2)
pSK1
(N=2)
pSK41
(N=2)
pSSAP2
(N=2)
pSSP1
(N=2)
pSSP2
(N=2)
pTW20
(N=2)
pvSw4
(N=2)
SAP016A
(N=2)
SAP019A
(N=2)
SAP099B
(N=4)
SAP105A
(N=2)
SAP108C
(N=2)
VRSAp
(N=2)
Cassette
(N=1)
pETB
(N=1)
pKH13
(N=1)
pKH21
(N=1)
pLNU1
(N=1)
pLNU3
(N=1)
pLNU9
(N=1)
pLUG10
(N=1)
pN315
(N=1)
pRJ9
(N=1)
pS0385
(N=1)
pSaa6159
(N=1)
pSAS
(N=1)
pSE12228p05
(N=1)
pSHaeA
(N=1)
pSJH101
(N=1)
pSK1
(N=1)
pSK41
(N=1)
pSSAP2
(N=1)
pSSP1
(N=1)
pSSP2
(N=1)
pTW20
(N=1)
pvSw4
(N=1)
SAP016A
(N=1)
SAP019A
(N=1)
SAP099B
(N=2)
SAP105A
(N=1)
SAP108C
(N=1)
VRSAp
(N=1)
Cassette
(N=19)
pETB
(N=19)
pKH13
(N=19)
pKH21
(N=19)
pLNU1
(N=19)
pLNU3
(N=19)
pLNU9
(N=19)
pLUG10
(N=19)
pN315
(N=19)
pRJ9
(N=19)
pS0385
(N=19)
pSaa6159
(N=19)
pSAS
(N=19)
pSE12228p05
(N=19)
pSHaeA
(N=19)
pSJH101
(N=19)
pSK1
(N=19)
pSK41
(N=19)
pSSAP2
(N=19)
pSSP1
(N=19)
pSSP2
(N=19)
pTW20
(N=19)
pvSw4
(N=19)
SAP016A
(N=19)
SAP019A
(N=19)
SAP099B
(N=38)
SAP105A
(N=19)
SAP108C
(N=19)
VRSAp
(N=19)
Cassette
(N=5)
pETB
(N=5)
pKH13
(N=5)
pKH21
(N=5)
pLNU1
(N=5)
pLNU3
(N=5)
pLNU9
(N=5)
pLUG10
(N=5)
pN315
(N=5)
pRJ9
(N=5)
pS0385
(N=5)
pSaa6159
(N=5)
pSAS
(N=5)
pSE12228p05
(N=5)
pSHaeA
(N=5)
pSJH101
(N=5)
pSK1
(N=5)
pSK41
(N=5)
pSSAP2
(N=5)
pSSP1
(N=5)
pSSP2
(N=5)
pTW20
(N=5)
pvSw4
(N=5)
SAP016A
(N=5)
SAP019A
(N=5)
SAP099B
(N=10)
SAP105A
(N=5)
SAP108C
(N=5)
VRSAp
(N=5)
Cassette
(N=12)
pETB
(N=12)
pKH13
(N=12)
pKH21
(N=12)
pLNU1
(N=12)
pLNU3
(N=12)
pLNU9
(N=12)
pLUG10
(N=12)
pN315
(N=12)
pRJ9
(N=12)
pS0385
(N=12)
pSaa6159
(N=12)
pSAS
(N=12)
pSE12228p05
(N=12)
pSHaeA
(N=12)
pSJH101
(N=12)
pSK1
(N=12)
pSK41
(N=12)
pSSAP2
(N=12)
pSSP1
(N=12)
pSSP2
(N=12)
pTW20
(N=12)
pvSw4
(N=12)
SAP016A
(N=12)
SAP019A
(N=12)
SAP099B
(N=24)
SAP105A
(N=12)
SAP108C
(N=12)
VRSAp
(N=12)
Cassette
(N=13)
pETB
(N=13)
pKH13
(N=13)
pKH21
(N=13)
pLNU1
(N=13)
pLNU3
(N=13)
pLNU9
(N=13)
pLUG10
(N=13)
pN315
(N=13)
pRJ9
(N=13)
pS0385
(N=13)
pSaa6159
(N=13)
pSAS
(N=13)
pSE12228p05
(N=13)
pSHaeA
(N=13)
pSJH101
(N=13)
pSK1
(N=13)
pSK41
(N=13)
pSSAP2
(N=13)
pSSP1
(N=13)
pSSP2
(N=13)
pTW20
(N=13)
pvSw4
(N=13)
SAP016A
(N=13)
SAP019A
(N=13)
SAP099B
(N=26)
SAP105A
(N=13)
SAP108C
(N=13)
VRSAp
(N=13)
Cassette
(N=83)
pETB
(N=83)
pKH13
(N=83)
pKH21
(N=83)
pLNU1
(N=83)
pLNU3
(N=83)
pLNU9
(N=83)
pLUG10
(N=83)
pN315
(N=83)
pRJ9
(N=83)
pS0385
(N=83)
pSaa6159
(N=83)
pSAS
(N=83)
pSE12228p05
(N=83)
pSHaeA
(N=83)
pSJH101
(N=83)
pSK1
(N=83)
pSK41
(N=83)
pSSAP2
(N=83)
pSSP1
(N=83)
pSSP2
(N=83)
pTW20
(N=83)
pvSw4
(N=83)
SAP016A
(N=83)
SAP019A
(N=83)
SAP099B
(N=166)
SAP105A
(N=83)
SAP108C
(N=83)
VRSAp
(N=83)
presence_gene
No 6 (100%) 6 (100%) 5 (83.3%) 6 (100%) 5 (83.3%) 6 (100%) 5 (83.3%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 5 (83.3%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 12 (100%) 6 (100%) 6 (100%) 6 (100%) 3 (100%) 3 (100%) 3 (100%) 3 (100%) 3 (100%) 3 (100%) 3 (100%) 3 (100%) 2 (66.7%) 1 (33.3%) 3 (100%) 3 (100%) 3 (100%) 3 (100%) 3 (100%) 2 (66.7%) 3 (100%) 3 (100%) 3 (100%) 3 (100%) 3 (100%) 3 (100%) 3 (100%) 3 (100%) 2 (66.7%) 6 (100%) 3 (100%) 3 (100%) 3 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 20 (95.2%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 42 (100%) 21 (100%) 21 (100%) 21 (100%) 0 (0%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 0 (0%) 0 (0%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 0 (0%) 1 (100%) 1 (100%) 0 (0%) 1 (100%) 1 (100%) 0 (0%) 1 (100%) 1 (100%) 2 (100%) 1 (100%) 1 (100%) 1 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 1 (50.0%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 4 (100%) 1 (50.0%) 2 (100%) 1 (50.0%) 1 (100%) 1 (100%) 1 (100%) 0 (0%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 0 (0%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 2 (100%) 1 (100%) 1 (100%) 1 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 16 (84.2%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 18 (94.7%) 18 (94.7%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 13 (68.4%) 19 (100%) 36 (94.7%) 16 (84.2%) 19 (100%) 19 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 4 (80.0%) 5 (100%) 5 (100%) 1 (20.0%) 5 (100%) 5 (100%) 5 (100%) 0 (0%) 5 (100%) 5 (100%) 10 (100%) 5 (100%) 5 (100%) 5 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 11 (91.7%) 8 (66.7%) 12 (100%) 12 (100%) 12 (100%) 11 (91.7%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 10 (83.3%) 12 (100%) 12 (100%) 24 (100%) 12 (100%) 12 (100%) 12 (100%) 13 (100%) 12 (92.3%) 13 (100%) 13 (100%) 13 (100%) 12 (92.3%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 12 (92.3%) 13 (100%) 13 (100%) 9 (69.2%) 13 (100%) 13 (100%) 7 (53.8%) 13 (100%) 10 (76.9%) 11 (84.6%) 12 (92.3%) 13 (100%) 13 (100%) 26 (100%) 13 (100%) 11 (84.6%) 13 (100%) 82 (98.8%) 82 (98.8%) 82 (98.8%) 82 (98.8%) 82 (98.8%) 82 (98.8%) 78 (94.0%) 82 (98.8%) 82 (98.8%) 81 (97.6%) 82 (98.8%) 79 (95.2%) 82 (98.8%) 81 (97.6%) 82 (98.8%) 74 (89.2%) 82 (98.8%) 82 (98.8%) 73 (88.0%) 82 (98.8%) 80 (96.4%) 81 (97.6%) 74 (89.2%) 77 (92.8%) 82 (98.8%) 164 (98.8%) 79 (95.2%) 81 (97.6%) 82 (98.8%)
Yes 0 (0%) 0 (0%) 1 (16.7%) 0 (0%) 1 (16.7%) 0 (0%) 1 (16.7%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (16.7%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (33.3%) 2 (66.7%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (33.3%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (33.3%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (4.8%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (100%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (50.0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (50.0%) 0 (0%) 1 (50.0%) 0 (0%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 3 (15.8%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (5.3%) 1 (5.3%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 6 (31.6%) 0 (0%) 2 (5.3%) 3 (15.8%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (20.0%) 0 (0%) 0 (0%) 4 (80.0%) 0 (0%) 0 (0%) 0 (0%) 5 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (8.3%) 4 (33.3%) 0 (0%) 0 (0%) 0 (0%) 1 (8.3%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 2 (16.7%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (7.7%) 0 (0%) 0 (0%) 0 (0%) 1 (7.7%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (7.7%) 0 (0%) 0 (0%) 4 (30.8%) 0 (0%) 0 (0%) 6 (46.2%) 0 (0%) 3 (23.1%) 2 (15.4%) 1 (7.7%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 2 (15.4%) 0 (0%) 1 (1.2%) 1 (1.2%) 1 (1.2%) 1 (1.2%) 1 (1.2%) 1 (1.2%) 5 (6.0%) 1 (1.2%) 1 (1.2%) 2 (2.4%) 1 (1.2%) 4 (4.8%) 1 (1.2%) 2 (2.4%) 1 (1.2%) 9 (10.8%) 1 (1.2%) 1 (1.2%) 10 (12.0%) 1 (1.2%) 3 (3.6%) 2 (2.4%) 9 (10.8%) 6 (7.2%) 1 (1.2%) 2 (1.2%) 4 (4.8%) 2 (2.4%) 1 (1.2%)
# Chromogenes 

# No plasmids = or > isolates

# Haemolyticus

model_1_plasmids_sau <- lm(mic_sau ~ as.factor(`rep13_7_rep(pLNU9)`) ,data=mb_haemolyticus_plasmids)
summary(model_1_plasmids_sau)
## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep13_7_rep(pLNU9)`), data = mb_haemolyticus_plasmids)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.77250 -0.07458  0.10750  0.41250  0.72750 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        3.9325     0.1928  20.400 2.96e-11 ***
## as.factor(`rep13_7_rep(pLNU9)`)1   0.2042     0.4310   0.474    0.644    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6678 on 13 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.01697,    Adjusted R-squared:  -0.05865 
## F-statistic: 0.2244 on 1 and 13 DF,  p-value: 0.6436
model_1_plasmids_sub <- lm(mic_sub ~ as.factor(`rep13_7_rep(pLNU9)`),data=mb_haemolyticus_plasmids)
summary(model_1_plasmids_sub)
## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep13_7_rep(pLNU9)`), data = mb_haemolyticus_plasmids)
## 
## Residuals:
##          1          6         10         11         13         15 
## -1.360e-01  1.388e-17 -1.416e+00  6.940e-01  3.940e-01  4.640e-01 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        3.5760     0.3792   9.431 0.000705 ***
## as.factor(`rep13_7_rep(pLNU9)`)1  -0.3960     0.9288  -0.426 0.691799    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8479 on 4 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.04347,    Adjusted R-squared:  -0.1957 
## F-statistic: 0.1818 on 1 and 4 DF,  p-value: 0.6918
model_2_plasmids_sau <- lm(mic_sau ~ as.factor(`rep39_2_repA(SAP016A)`),data=mb_haemolyticus_plasmids)
summary(model_2_plasmids_sau)
## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep39_2_repA(SAP016A)`), data = mb_haemolyticus_plasmids)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.59778 -0.14722  0.07222  0.32278  0.80222 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           3.7578     0.2036  18.454 1.05e-10 ***
## as.factor(`rep39_2_repA(SAP016A)`)1   0.5389     0.3220   1.674    0.118    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6109 on 13 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.1773, Adjusted R-squared:  0.114 
## F-statistic: 2.801 on 1 and 13 DF,  p-value: 0.1181
model_2_plasmids_sub <- lm(mic_sub ~ as.factor(`rep39_2_repA(SAP016A)`),data=mb_haemolyticus_plasmids)
summary(model_2_plasmids_sub)
## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep39_2_repA(SAP016A)`), data = mb_haemolyticus_plasmids)
## 
## Residuals:
##     1     6    10    11    13    15 
## -0.19 -0.45 -1.23  0.64  0.58  0.65 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                           3.3900     0.4933   6.872  0.00235 **
## as.factor(`rep39_2_repA(SAP016A)`)1   0.2400     0.6976   0.344  0.74815   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8544 on 4 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.02874,    Adjusted R-squared:  -0.2141 
## F-statistic: 0.1184 on 1 and 4 DF,  p-value: 0.7481
model_4_plasmids_sau <- lm(mic_sau ~ as.factor(`rep19b_1_repA(SAP105A)`),data=mb_haemolyticus_plasmids)
summary(model_4_plasmids_sau)
## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep19b_1_repA(SAP105A)`), data = mb_haemolyticus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7517 -0.1308  0.1783  0.3133  0.7483 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            3.9117     0.1906  20.520 2.75e-11 ***
## as.factor(`rep19b_1_repA(SAP105A)`)1   0.3083     0.4263   0.723    0.482    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6603 on 13 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.03869,    Adjusted R-squared:  -0.03525 
## F-statistic: 0.5232 on 1 and 13 DF,  p-value: 0.4823
model_4_plasmids_sub <- lm(mic_sub ~ as.factor(`rep19b_1_repA(SAP105A)`),data=mb_haemolyticus_plasmids)
summary(model_4_plasmids_sub)
## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep19b_1_repA(SAP105A)`), data = mb_haemolyticus_plasmids)
## 
## Residuals:
##       1       6      10      11      13      15 
## -0.1133 -0.3733 -1.3067  0.8033  0.5033  0.4867 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                           3.46667    0.49959   6.939  0.00227 **
## as.factor(`rep19b_1_repA(SAP105A)`)1  0.08667    0.70653   0.123  0.90829   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8653 on 4 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.003748,   Adjusted R-squared:  -0.2453 
## F-statistic: 0.01505 on 1 and 4 DF,  p-value: 0.9083
# Succinus (psaa6159 associated)

model_5_plasmids_sau <- lm(mic_sau ~ as.factor(`rep16_3_rep(pSaa6159)`) ,data=mb_succinus_plasmids)
summary(model_5_plasmids_sau)
## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep16_3_rep(pSaa6159)`), data = mb_succinus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2325 -0.1462 -0.0300  0.3137  0.9575 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           3.1525     0.2120  14.870 1.22e-07 ***
## as.factor(`rep16_3_rep(pSaa6159)`)1  -0.8425     0.4060  -2.075   0.0678 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5996 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.3237, Adjusted R-squared:  0.2485 
## F-statistic: 4.307 on 1 and 9 DF,  p-value: 0.06778
Confint(model_5_plasmids_sau)
##                                     Estimate     2.5 %     97.5 %
## (Intercept)                           3.1525  2.672909 3.63209080
## as.factor(`rep16_3_rep(pSaa6159)`)1  -0.8425 -1.760846 0.07584647
model_5_plasmids_sub <- lm(mic_sub ~ as.factor(`rep16_3_rep(pSaa6159)`),data=mb_succinus_plasmids)
summary(model_5_plasmids_sub)
## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep16_3_rep(pSaa6159)`), data = mb_succinus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5025 -0.3713 -0.0300  0.7025  0.9875 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           3.1525     0.3144   10.03  3.5e-06 ***
## as.factor(`rep16_3_rep(pSaa6159)`)1  -0.8425     0.6020   -1.40    0.195    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8892 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1787, Adjusted R-squared:  0.08748 
## F-statistic: 1.959 on 1 and 9 DF,  p-value: 0.1952
Confint(model_5_plasmids_sub)
##                                     Estimate     2.5 %    97.5 %
## (Intercept)                           3.1525  2.441321 3.8636792
## as.factor(`rep16_3_rep(pSaa6159)`)1  -0.8425 -2.204305 0.5193046
model_6_plasmids_sau <- lm(mic_sau ~ as.factor(`rep19c_2_rep(pvSw4)`) ,data=mb_succinus_plasmids)
summary(model_6_plasmids_sau)
## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep19c_2_rep(pvSw4)`), data = mb_succinus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8456 -0.3706 -0.0500  0.3044  1.3444 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         2.7656     0.2097  13.188 3.43e-07 ***
## as.factor(`rep19c_2_rep(pvSw4)`)1   0.8644     0.4918   1.758    0.113    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6291 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2556, Adjusted R-squared:  0.1728 
## F-statistic:  3.09 on 1 and 9 DF,  p-value: 0.1127
Confint(model_6_plasmids_sau)
##                                    Estimate      2.5 %   97.5 %
## (Intercept)                       2.7655556  2.2911731 3.239938
## as.factor(`rep19c_2_rep(pvSw4)`)1 0.8644444 -0.2480809 1.976970
model_6_plasmids_sub <- lm(mic_sub ~ as.factor(`rep19c_2_rep(pvSw4)`),data=mb_succinus_plasmids)
summary(model_6_plasmids_sub)
## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep19c_2_rep(pvSw4)`), data = mb_succinus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2267 -0.6667 -0.3667  0.8517  1.2633 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         2.8767     0.3251   8.849  9.8e-06 ***
## as.factor(`rep19c_2_rep(pvSw4)`)1   0.2533     0.7624   0.332    0.747    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9752 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.01212,    Adjusted R-squared:  -0.09764 
## F-statistic: 0.1104 on 1 and 9 DF,  p-value: 0.7473
Confint(model_6_plasmids_sub)
##                                    Estimate     2.5 %   97.5 %
## (Intercept)                       2.8766667  2.141286 3.612047
## as.factor(`rep19c_2_rep(pvSw4)`)1 0.2533333 -1.471287 1.977953
# Xylosus & pseudoxylosus (pSSP2 associated)

model_7_plasmids_sau <- lm(mic_sau ~ as.factor(`rep16_2_CDS6(pSJH101)`) ,data=mb_xylosus_plasmids)
summary(model_7_plasmids_sau)
## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep16_2_CDS6(pSJH101)`), data = mb_xylosus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3800 -0.2825 -0.1640  0.0900  1.0100 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           3.0800     0.1186  25.971 1.65e-14 ***
## as.factor(`rep16_2_CDS6(pSJH101)`)1  -0.0920     0.2250  -0.409    0.688    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4276 on 16 degrees of freedom
## Multiple R-squared:  0.01034,    Adjusted R-squared:  -0.05151 
## F-statistic: 0.1672 on 1 and 16 DF,  p-value: 0.6881
Confint(model_7_plasmids_sau)
##                                     Estimate      2.5 %    97.5 %
## (Intercept)                            3.080  2.8285895 3.3314105
## as.factor(`rep16_2_CDS6(pSJH101)`)1   -0.092 -0.5690178 0.3850178
model_7_plasmids_sub <- lm(mic_sub ~ as.factor(`rep16_2_CDS6(pSJH101)`),data=mb_xylosus_plasmids)
summary(model_7_plasmids_sub)
## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep16_2_CDS6(pSJH101)`), data = mb_xylosus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3700 -0.3685 -0.1100  0.5680  1.0100 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           3.0800     0.1822  16.904 1.26e-11 ***
## as.factor(`rep16_2_CDS6(pSJH101)`)1  -0.0920     0.3457  -0.266    0.794    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.657 on 16 degrees of freedom
## Multiple R-squared:  0.004407,   Adjusted R-squared:  -0.05782 
## F-statistic: 0.07082 on 1 and 16 DF,  p-value: 0.7935
Confint(model_7_plasmids_sub)
##                                     Estimate      2.5 %    97.5 %
## (Intercept)                            3.080  2.6937382 3.4662618
## as.factor(`rep16_2_CDS6(pSJH101)`)1   -0.092 -0.8248802 0.6408802
model_8_plasmids_sau <- lm(mic_sau ~ as.factor(`rep20_14_repA(pSSAP2)`) ,data=mb_xylosus_plasmids)
summary(model_8_plasmids_sau)
## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep20_14_repA(pSSAP2)`), data = mb_xylosus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4350 -0.2297 -0.1487  0.1634  0.9550 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           2.9538     0.1482  19.935 1.01e-12 ***
## as.factor(`rep20_14_repA(pSSAP2)`)1   0.1812     0.1988   0.912    0.375    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4191 on 16 degrees of freedom
## Multiple R-squared:  0.04939,    Adjusted R-squared:  -0.01002 
## F-statistic: 0.8313 on 1 and 16 DF,  p-value: 0.3754
Confint(model_8_plasmids_sau)
##                                     Estimate     2.5 %   97.5 %
## (Intercept)                          2.95375  2.639650 3.267850
## as.factor(`rep20_14_repA(pSSAP2)`)1  0.18125 -0.240159 0.602659
model_8_plasmids_sub <- lm(mic_sub ~ as.factor(`rep20_14_repA(pSSAP2)`),data=mb_xylosus_plasmids)
summary(model_8_plasmids_sub)
## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep20_14_repA(pSSAP2)`), data = mb_xylosus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3250 -0.3378 -0.0650  0.5487  1.0550 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                          3.07875    0.23264   13.23 4.92e-10 ***
## as.factor(`rep20_14_repA(pSSAP2)`)1 -0.04375    0.31212   -0.14     0.89    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.658 on 16 degrees of freedom
## Multiple R-squared:  0.001226,   Adjusted R-squared:  -0.0612 
## F-statistic: 0.01965 on 1 and 16 DF,  p-value: 0.8903
Confint(model_8_plasmids_sub)
##                                     Estimate      2.5 %    97.5 %
## (Intercept)                          3.07875  2.5855752 3.5719248
## as.factor(`rep20_14_repA(pSSAP2)`)1 -0.04375 -0.7054135 0.6179135
model_8_plasmids_sau <- lm(mic_sau ~ as.factor(`rep19c_4_rep(pSSP2)`) ,data=mb_xylosus_plasmids)
summary(model_8_plasmids_sau)
## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep19c_4_rep(pSSP2)`), data = mb_xylosus_plasmids)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48067 -0.27067 -0.14700  0.07683  0.99933 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         3.0907     0.1087  28.422 4.01e-15 ***
## as.factor(`rep19c_4_rep(pSSP2)`)1  -0.2173     0.2664  -0.816    0.427    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4212 on 16 degrees of freedom
## Multiple R-squared:  0.03995,    Adjusted R-squared:  -0.02006 
## F-statistic: 0.6657 on 1 and 16 DF,  p-value: 0.4265
Confint(model_8_plasmids_sau)
##                                     Estimate      2.5 %  97.5 %
## (Intercept)                        3.0906667  2.8601438 3.32119
## as.factor(`rep19c_4_rep(pSSP2)`)1 -0.2173333 -0.7819967 0.34733
model_8_plasmids_sub <- lm(mic_sub ~as.factor(`rep19c_4_rep(pSSP2)`),data=mb_xylosus_plasmids)
summary(model_8_plasmids_sub)
## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep19c_4_rep(pSSP2)`), data = mb_xylosus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6640 -0.3915 -0.1140  0.5503  0.8660 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         3.2240     0.1346  23.951 5.84e-14 ***
## as.factor(`rep19c_4_rep(pSSP2)`)1  -1.0173     0.3297  -3.085  0.00709 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5213 on 16 degrees of freedom
## Multiple R-squared:  0.373,  Adjusted R-squared:  0.3339 
## F-statistic:  9.52 on 1 and 16 DF,  p-value: 0.007093
Confint(model_8_plasmids_sub)
##                                    Estimate     2.5 %     97.5 %
## (Intercept)                        3.224000  2.938644  3.5093559
## as.factor(`rep19c_4_rep(pSSP2)`)1 -1.017333 -1.716310 -0.3183569
model_9_plasmids_sau <- lm(mic_sau ~ as.factor(`rep20_3_rep(pTW20)`) ,data=mb_xylosus_plasmids)
summary(model_9_plasmids_sau)
## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep20_3_rep(pTW20)`), data = mb_xylosus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4300 -0.2850 -0.1250  0.1275  1.0500 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        3.0400     0.1069  28.435 3.98e-15 ***
## as.factor(`rep20_3_rep(pTW20)`)1   0.1300     0.3207   0.405    0.691    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4276 on 16 degrees of freedom
## Multiple R-squared:  0.01016,    Adjusted R-squared:  -0.0517 
## F-statistic: 0.1643 on 1 and 16 DF,  p-value: 0.6906
Confint(model_9_plasmids_sau)
##                                  Estimate      2.5 %    97.5 %
## (Intercept)                          3.04  2.8133615 3.2666385
## as.factor(`rep20_3_rep(pTW20)`)1     0.13 -0.5499154 0.8099154
model_9_plasmids_sub <- lm(mic_sub ~as.factor(`rep20_3_rep(pTW20)`),data=mb_xylosus_plasmids)
summary(model_9_plasmids_sub)
## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep20_3_rep(pTW20)`), data = mb_xylosus_plasmids)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -1.33  -0.33  -0.07   0.49   1.05 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        3.0400     0.1642  18.509 3.15e-12 ***
## as.factor(`rep20_3_rep(pTW20)`)1   0.1300     0.4927   0.264    0.795    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.657 on 16 degrees of freedom
## Multiple R-squared:  0.004332,   Adjusted R-squared:  -0.0579 
## F-statistic: 0.06961 on 1 and 16 DF,  p-value: 0.7953
Confint(model_9_plasmids_sub)
##                                  Estimate      2.5 %   97.5 %
## (Intercept)                          3.04  2.6918152 3.388185
## as.factor(`rep20_3_rep(pTW20)`)1     0.13 -0.9145543 1.174554
model_10_plasmids_sau <- lm(mic_sau ~ as.factor(`rep19c_2_rep(pvSw4)`) ,data=mb_xylosus_plasmids)
summary(model_10_plasmids_sau)
## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep19c_2_rep(pvSw4)`), data = mb_xylosus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4700 -0.2567 -0.1267  0.1113  1.0483 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        3.04167    0.12396  24.538    4e-14 ***
## as.factor(`rep19c_2_rep(pvSw4)`)1  0.03833    0.21470   0.179    0.861    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4294 on 16 degrees of freedom
## Multiple R-squared:  0.001988,   Adjusted R-squared:  -0.06039 
## F-statistic: 0.03188 on 1 and 16 DF,  p-value: 0.8605
Confint(model_10_plasmids_sau)
##                                     Estimate      2.5 %    97.5 %
## (Intercept)                       3.04166667  2.7788886 3.3044447
## as.factor(`rep19c_2_rep(pvSw4)`)1 0.03833333 -0.4168116 0.4934783
model_10_plasmids_sub <- lm(mic_sub ~as.factor(`rep19c_2_rep(pvSw4)`),data=mb_xylosus_plasmids)
summary(model_10_plasmids_sub)
## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep19c_2_rep(pvSw4)`), data = mb_xylosus_plasmids)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.33167 -0.36042 -0.09083  0.58333  1.04833 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        3.04167    0.18999  16.010 2.86e-11 ***
## as.factor(`rep19c_2_rep(pvSw4)`)1  0.03833    0.32907   0.116    0.909    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6581 on 16 degrees of freedom
## Multiple R-squared:  0.0008474,  Adjusted R-squared:  -0.0616 
## F-statistic: 0.01357 on 1 and 16 DF,  p-value: 0.9087
Confint(model_10_plasmids_sub)
##                                     Estimate      2.5 %    97.5 %
## (Intercept)                       3.04166667  2.6389147 3.4444186
## as.factor(`rep19c_2_rep(pvSw4)`)1 0.03833333 -0.6592536 0.7359202
model_11_plasmids_sau <- lm(mic_sau ~ as.factor(`rep7a_6_SAP108C001(SAP108C)`) ,data=mb_xylosus_plasmids)
summary(model_11_plasmids_sau)
## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep7a_6_SAP108C001(SAP108C)`), 
##     data = mb_xylosus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4725 -0.2975 -0.0475  0.0850  1.0075 
## 
## Coefficients:
##                                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)                                 3.0825     0.1054  29.252 2.55e-15
## as.factor(`rep7a_6_SAP108C001(SAP108C)`)1  -0.2525     0.3161  -0.799    0.436
##                                              
## (Intercept)                               ***
## as.factor(`rep7a_6_SAP108C001(SAP108C)`)1    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4215 on 16 degrees of freedom
## Multiple R-squared:  0.03834,    Adjusted R-squared:  -0.02176 
## F-statistic: 0.638 on 1 and 16 DF,  p-value: 0.4361
Confint(model_11_plasmids_sau)
##                                           Estimate      2.5 %    97.5 %
## (Intercept)                                 3.0825  2.8591109 3.3058891
## as.factor(`rep7a_6_SAP108C001(SAP108C)`)1  -0.2525 -0.9226673 0.4176673
model_11_plasmids_sub <- lm(mic_sub ~as.factor(`rep7a_6_SAP108C001(SAP108C)`),data=mb_xylosus_plasmids)
summary(model_11_plasmids_sub)
## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep7a_6_SAP108C001(SAP108C)`), 
##     data = mb_xylosus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3100 -0.3875 -0.0500  0.4275  1.0700 
## 
## Coefficients:
##                                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)                                 3.0200     0.1626  18.577 2.98e-12
## as.factor(`rep7a_6_SAP108C001(SAP108C)`)1   0.3100     0.4877   0.636    0.534
##                                              
## (Intercept)                               ***
## as.factor(`rep7a_6_SAP108C001(SAP108C)`)1    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6502 on 16 degrees of freedom
## Multiple R-squared:  0.02463,    Adjusted R-squared:  -0.03633 
## F-statistic: 0.4041 on 1 and 16 DF,  p-value: 0.534
Confint(model_11_plasmids_sub)
##                                           Estimate      2.5 %   97.5 %
## (Intercept)                                   3.02  2.6753829 3.364617
## as.factor(`rep7a_6_SAP108C001(SAP108C)`)1     0.31 -0.7238512 1.343851

4.2 Visualizations

# ---- amr genes ----


# Staphylococcus aureus

amr_plot_1 <- amr_sau %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(amr_sau$wgs_isolate_id,amr_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


# Staphylococcus chromogenes

amr_plot_2 <- amr_sch %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(amr_sch$wgs_isolate_id,amr_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

# Staphylococcus haemolyticus

amr_plot_3 <- amr_shaem %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(amr_shaem$wgs_isolate_id,amr_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

# Staphylococcus sciuri

amr_plot_4 <- amr_ssc %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(amr_ssc$wgs_isolate_id,amr_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Mammaliicoccus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

#Staphylococcus succinus

amr_plot_5 <- amr_ssuc %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(amr_ssuc$wgs_isolate_id,amr_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


# Staphylococcus xylosus/saprophytiucs

amr_plot_6 <- amr_sxyl_spxyl %>%
  ggplot(aes(x = gene_abbreviation, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

# Others

amr_plot_7 <- amr_other %>%
  ggplot(aes(x = gene_abbreviation, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "ARGs",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x = element_text(angle = 45, hjust = 1))


# Multipanel figure with different species

amr_plot <- ggarrange(amr_plot_1 + rremove("xlab")+ rremove("ylab"),amr_plot_2 + rremove("xlab")+ rremove("ylab"),amr_plot_3 + rremove("xlab")+ rremove("ylab"),amr_plot_4 + rremove("xlab")+ rremove("ylab"),amr_plot_5 + rremove("xlab")+ rremove("ylab"),amr_plot_6 + rremove("xlab")+ rremove("ylab"),amr_plot_7 + rremove("xlab")+ rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2.25))

annotate_figure(amr_plot,
                bottom = text_grob("Gene", color = "black",
                                   hjust = 0.75, x = 0.5),
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/amr.png",width = 20, height = 40, units = "cm")





# ---- plasmid genes ----


# Staphylococcus aureus

plasmid_plot_1 <- plasmid_sau %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(plasmid_sau$wgs_isolate_id,plasmid_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


# Staphylococcus chromogenes

plasmid_plot_2 <- plasmid_sch %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(plasmid_sch$wgs_isolate_id,plasmid_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

# Staphylococcus haemolyticus

plasmid_plot_3 <- plasmid_shaem %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(plasmid_shaem$wgs_isolate_id,plasmid_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

# Staphylococcus sciuri

plasmid_plot_4 <- plasmid_ssc %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(plasmid_ssc$wgs_isolate_id,plasmid_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Mammaliicoccus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

#Staphylococcus succinus

plasmid_plot_5 <- plasmid_ssuc %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(plasmid_ssuc$wgs_isolate_id,plasmid_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


# Staphylococcus xylosus/saprophytiucs

plasmid_plot_6 <- plasmid_sxyl_spxyl %>%
  ggplot(aes(x = gene_abbreviation, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

# Others

plasmid_plot_7 <- plasmid_other %>%
  ggplot(aes(x = gene_abbreviation, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Plasmids",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x = element_text(angle = 45, hjust = 1))


# Multipanel figure with different species

plasmid_plot <- ggarrange(plasmid_plot_1 + rremove("xlab")+ rremove("ylab"),plasmid_plot_2 + rremove("xlab")+ rremove("ylab"),plasmid_plot_3 + rremove("xlab")+ rremove("ylab"),plasmid_plot_4 + rremove("xlab")+ rremove("ylab"),plasmid_plot_5 + rremove("xlab")+ rremove("ylab"),plasmid_plot_6 + rremove("xlab")+ rremove("ylab"),plasmid_plot_7 + rremove("xlab")+ rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2.25))

annotate_figure(plasmid_plot,
                bottom = text_grob("Gene", color = "black",
                                   hjust = 0.75, x = 0.5),
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/plasmid.png",width = 20, height = 40, units = "cm")


amr_plasmid_plot <- ggarrange(
  amr_plot_1 + rremove("xlab") + rremove("ylab"),
  plasmid_plot_1 + rremove("xlab") + rremove("ylab") ,
  amr_plot_2 + rremove("xlab") + rremove("ylab"),
  plasmid_plot_2 + rremove("xlab") + rremove("ylab") ,
  amr_plot_3 + rremove("xlab") + rremove("ylab"),
  plasmid_plot_3 + rremove("xlab") + rremove("ylab") ,
  amr_plot_4 + rremove("xlab") + rremove("ylab"),
  plasmid_plot_4 + rremove("xlab") + rremove("ylab") ,
  amr_plot_5 + rremove("xlab") + rremove("ylab"),
  plasmid_plot_5 + rremove("xlab") + rremove("ylab") ,
  amr_plot_6 + rremove("xlab") + rremove("ylab"),
  plasmid_plot_6 + rremove("xlab") + rremove("ylab") ,
  amr_plot_7 + rremove("ylab"),
  plasmid_plot_7 + rremove("ylab") ,
  ncol = 2, nrow = 7, common.legend = TRUE, align = "v",
  labels = c("A1", "A2", "B1", "B2", "C1", "C2", "D1", "D2", "E1", "E2", "F1", "F2", "G1", "G2"),
  heights = c(1.25,4.5, 4.25, 1.75,3,3.5, 2.5)
)

annotate_figure(amr_plasmid_plot, left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/figure_11.pdf",width = 40, height = 40, units = "cm",dpi=600)
ggsave(plot = last_plot(),"./figures/figure_11.png",width = 40, height = 40, units = "cm",dpi=600)
ggsave(plot = last_plot(),"./figures/figure_11.jpeg",width = 40, height = 40, units = "cm",dpi=600)