1 Assembly process statistics

1.1 All contigs

# ---- Results ----

# ---- Quast descriptive stats ----

table1(~`# contigs (>= 0 bp)` + N50 + L50 + `Largest contig` + `Total length (>= 0 bp)`, data= quast_wgs)

	Overall (N=84)
# contigs (>= 0 bp)
Mean (SD)	120 (333)
Median [Min, Max]	45.0 [19.0, 2590]
N50
Mean (SD)	585000 (417000)
Median [Min, Max]	390000 [20900, 1640000]
L50
Mean (SD)	3.24 (4.36)
Median [Min, Max]	3.00 [1.00, 39.0]
Largest contig
Mean (SD)	883000 (367000)
Median [Min, Max]	836000 [77900, 1640000]
Total length (>= 0 bp)
Mean (SD)	2640000 (291000)
Median [Min, Max]	2660000 [1830000, 3470000]

table1(~`# contigs (>= 0 bp)` + N50 + L50 + `Largest contig` + `Total length (>= 0 bp)`, data= quast_wgs_filtered)

	Overall (N=84)
# contigs (>= 0 bp)
Mean (SD)	48.4 (137)
Median [Min, Max]	20.0 [7.00, 1100]
N50
Mean (SD)	585000 (417000)
Median [Min, Max]	390000 [20900, 1640000]
L50
Mean (SD)	3.24 (4.36)
Median [Min, Max]	3.00 [1.00, 39.0]
Largest contig
Mean (SD)	883000 (367000)
Median [Min, Max]	836000 [77900, 1640000]
Total length (>= 0 bp)
Mean (SD)	2610000 (276000)
Median [Min, Max]	2650000 [1830000, 3230000]

1.2 Contigs >500 bp (used for analysis)

# ---- Quast descriptive stats ----

table1(~`# contigs (>= 0 bp)` + N50 + L50 + `Largest contig` + `Total length (>= 0 bp)`, data= quast_wgs_filtered)

	Overall (N=84)
# contigs (>= 0 bp)
Mean (SD)	48.4 (137)
Median [Min, Max]	20.0 [7.00, 1100]
N50
Mean (SD)	585000 (417000)
Median [Min, Max]	390000 [20900, 1640000]
L50
Mean (SD)	3.24 (4.36)
Median [Min, Max]	3.00 [1.00, 39.0]
Largest contig
Mean (SD)	883000 (367000)
Median [Min, Max]	836000 [77900, 1640000]
Total length (>= 0 bp)
Mean (SD)	2610000 (276000)
Median [Min, Max]	2650000 [1830000, 3230000]

2 AMPs genes

# Presence of AMP related genes

# Prevalence AMPs

table1(~mic_sau + mic_sub+sau_inhibition_category+sub_inhibition_category+case_control+aip_1+aip_2+aip_3+aip_4+sactipeptides+subtilosin_a+putative_bacteriocin_193+putative_bacteriocin_194|wgs_species_3,data=subset(metadata_bagel,metadata_bagel$wgs_species_3!="SAU"))

	SCH (N=21)	SCO (N=1)	SDEV (N=2)	SEQ (N=1)	SHAEM (N=19)	SPXYL (N=5)	MSC (N=6)	SSUC (N=12)	SXYL (N=13)	Overall (N=80)
mic_sau
Mean (SD)	3.32 (0.497)	4.59 (NA)	4.29 (0.297)	NA (NA)	3.97 (0.649)	3.06 (0.522)	3.33 (0.560)	2.92 (0.692)	3.05 (0.394)	3.37 (0.676)
Median [Min, Max]	3.25 [2.59, 4.51]	4.59 [4.59, 4.59]	4.29 [4.08, 4.50]	NA [NA, NA]	4.14 [2.16, 4.66]	3.01 [2.61, 3.93]	3.49 [2.60, 4.03]	3.03 [1.92, 4.11]	2.90 [2.71, 4.09]	3.24 [1.92, 4.66]
Missing	2 (9.5%)	0 (0%)	0 (0%)	1 (100%)	4 (21.1%)	0 (0%)	0 (0%)	1 (8.3%)	0 (0%)	8 (10.0%)
mic_sub
Mean (SD)	3.06 (0.604)	NA (NA)	3.29 (0.297)	NA (NA)	3.51 (0.775)	3.06 (0.522)	3.49 (0.480)	2.92 (0.931)	3.05 (0.698)	3.13 (0.687)
Median [Min, Max]	3.00 [2.51, 5.18]	NA [NA, NA]	3.29 [3.08, 3.50]	NA [NA, NA]	3.71 [2.16, 4.27]	3.01 [2.61, 3.93]	3.59 [2.60, 4.03]	2.58 [1.65, 4.14]	2.93 [1.71, 4.09]	3.10 [1.65, 5.18]
Missing	2 (9.5%)	1 (100%)	0 (0%)	1 (100%)	13 (68.4%)	0 (0%)	0 (0%)	1 (8.3%)	0 (0%)	18 (22.5%)
sau_inhibition_category
11-20	2 (9.5%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (20.0%)	1 (16.7%)	0 (0%)	6 (46.2%)	10 (12.5%)
21-30	2 (9.5%)	0 (0%)	0 (0%)	0 (0%)	1 (5.3%)	2 (40.0%)	0 (0%)	3 (25.0%)	2 (15.4%)	10 (12.5%)
31-40	6 (28.6%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (16.7%)	0 (0%)	3 (23.1%)	10 (12.5%)
41-50	4 (19.0%)	0 (0%)	0 (0%)	0 (0%)	1 (5.3%)	0 (0%)	2 (33.3%)	2 (16.7%)	1 (7.7%)	10 (12.5%)
51-60	1 (4.8%)	0 (0%)	1 (50.0%)	0 (0%)	4 (21.1%)	1 (20.0%)	1 (16.7%)	1 (8.3%)	1 (7.7%)	10 (12.5%)
61-70	2 (9.5%)	0 (0%)	1 (50.0%)	0 (0%)	7 (36.8%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	10 (12.5%)
Top 10	2 (9.5%)	0 (0%)	0 (0%)	0 (0%)	1 (5.3%)	1 (20.0%)	1 (16.7%)	5 (41.7%)	0 (0%)	10 (12.5%)
>70	0 (0%)	1 (100%)	0 (0%)	0 (0%)	1 (5.3%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	2 (2.5%)
Undetermined MIC	0 (0%)	0 (0%)	0 (0%)	1 (100%)	4 (21.1%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	5 (6.3%)
Missing	2 (9.5%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (8.3%)	0 (0%)	3 (3.8%)
sub_inhibition_category
>70	1 (4.8%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (1.3%)
11-20	5 (23.8%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	2 (40.0%)	1 (16.7%)	1 (8.3%)	1 (7.7%)	10 (12.5%)
21-30	3 (14.3%)	0 (0%)	1 (50.0%)	0 (0%)	0 (0%)	2 (40.0%)	0 (0%)	0 (0%)	4 (30.8%)	10 (12.5%)
31-40	5 (23.8%)	0 (0%)	0 (0%)	0 (0%)	1 (5.3%)	0 (0%)	0 (0%)	1 (8.3%)	2 (15.4%)	9 (11.3%)
41-50	3 (14.3%)	0 (0%)	1 (50.0%)	0 (0%)	1 (5.3%)	0 (0%)	3 (50.0%)	1 (8.3%)	0 (0%)	9 (11.3%)
Top 10	2 (9.5%)	0 (0%)	0 (0%)	0 (0%)	1 (5.3%)	0 (0%)	0 (0%)	5 (41.7%)	2 (15.4%)	10 (12.5%)
Undetermined MIC	0 (0%)	1 (100%)	0 (0%)	1 (100%)	13 (68.4%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	15 (18.8%)
51-60	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (5.3%)	1 (20.0%)	2 (33.3%)	1 (8.3%)	3 (23.1%)	8 (10.0%)
61-70	0 (0%)	0 (0%)	0 (0%)	0 (0%)	2 (10.5%)	0 (0%)	0 (0%)	2 (16.7%)	1 (7.7%)	5 (6.3%)
Missing	2 (9.5%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (8.3%)	0 (0%)	3 (3.8%)
case_control
Control	12 (57.1%)	0 (0%)	2 (100%)	1 (100%)	10 (52.6%)	2 (40.0%)	3 (50.0%)	6 (50.0%)	6 (46.2%)	42 (52.5%)
Case	9 (42.9%)	1 (100%)	0 (0%)	0 (0%)	9 (47.4%)	3 (60.0%)	3 (50.0%)	6 (50.0%)	7 (53.8%)	38 (47.5%)
aip_1
0	5 (23.8%)	1 (100%)	2 (100%)	1 (100%)	0 (0%)	5 (100%)	3 (50.0%)	12 (100%)	13 (100%)	42 (52.5%)
1	16 (76.2%)	0 (0%)	0 (0%)	0 (0%)	19 (100%)	0 (0%)	3 (50.0%)	0 (0%)	0 (0%)	38 (47.5%)
aip_2
0	16 (76.2%)	0 (0%)	2 (100%)	1 (100%)	19 (100%)	0 (0%)	6 (100%)	0 (0%)	6 (46.2%)	50 (62.5%)
1	5 (23.8%)	1 (100%)	0 (0%)	0 (0%)	0 (0%)	5 (100%)	0 (0%)	12 (100%)	7 (53.8%)	30 (37.5%)
aip_3
0	21 (100%)	1 (100%)	2 (100%)	1 (100%)	19 (100%)	5 (100%)	6 (100%)	12 (100%)	8 (61.5%)	75 (93.8%)
1	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	5 (38.5%)	5 (6.3%)
aip_4
0	21 (100%)	1 (100%)	0 (0%)	1 (100%)	19 (100%)	5 (100%)	6 (100%)	12 (100%)	12 (92.3%)	77 (96.3%)
1	0 (0%)	0 (0%)	2 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (7.7%)	3 (3.8%)
sactipeptides
Mean (SD)	1.00 (0)	1.00 (NA)	1.00 (0)	1.00 (NA)	1.00 (0)	1.00 (0)	1.00 (0)	1.00 (0)	1.00 (0)	1.00 (0)
Median [Min, Max]	1.00 [1.00, 1.00]	1.00 [1.00, 1.00]	1.00 [1.00, 1.00]	1.00 [1.00, 1.00]	1.00 [1.00, 1.00]	1.00 [1.00, 1.00]	1.00 [1.00, 1.00]	1.00 [1.00, 1.00]	1.00 [1.00, 1.00]	1.00 [1.00, 1.00]
subtilosin_a
1	13 (61.9%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	13 (16.3%)
0	8 (38.1%)	1 (100%)	2 (100%)	1 (100%)	19 (100%)	5 (100%)	6 (100%)	12 (100%)	13 (100%)	67 (83.8%)
putative_bacteriocin_193
0	21 (100%)	1 (100%)	2 (100%)	1 (100%)	19 (100%)	5 (100%)	2 (33.3%)	12 (100%)	13 (100%)	76 (95.0%)
1	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	4 (66.7%)	0 (0%)	0 (0%)	4 (5.0%)
putative_bacteriocin_194
0	21 (100%)	1 (100%)	2 (100%)	0 (0%)	19 (100%)	5 (100%)	6 (100%)	12 (100%)	6 (46.2%)	72 (90.0%)
1	0 (0%)	0 (0%)	0 (0%)	1 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	7 (53.8%)	8 (10.0%)

# AIP1

table(metadata_bagel$wgs_species_2,metadata_bagel$aip_1)

##                                       
##                                         0  1
##   Other                                 4  0
##   Staphylococcus chromogenes            5 16
##   Staphylococcus haemolyticus           0 19
##   Staphylococcus sciuri                 3  3
##   Staphylococcus succinus              12  0
##   Staphylococcus xylosus/pseudoxylosus 18  0

fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$aip_1))

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$aip_1)
## p-value = 1.063e-15
## alternative hypothesis: two.sided

# AIP2

table(metadata_bagel$wgs_species_2,metadata_bagel$aip_2)

##                                       
##                                         0  1
##   Other                                 3  1
##   Staphylococcus chromogenes           16  5
##   Staphylococcus haemolyticus          19  0
##   Staphylococcus sciuri                 6  0
##   Staphylococcus succinus               0 12
##   Staphylococcus xylosus/pseudoxylosus  6 12

fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$aip_2))

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$aip_2)
## p-value = 2.758e-10
## alternative hypothesis: two.sided

# AIP3

table(metadata_bagel$wgs_species_2,metadata_bagel$aip_3)

##                                       
##                                         0  1
##   Other                                 4  0
##   Staphylococcus chromogenes           21  0
##   Staphylococcus haemolyticus          19  0
##   Staphylococcus sciuri                 6  0
##   Staphylococcus succinus              12  0
##   Staphylococcus xylosus/pseudoxylosus 13  5

fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$aip_3))

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$aip_3)
## p-value = 0.007501
## alternative hypothesis: two.sided

# AIP4

table(metadata_bagel$wgs_species_2,metadata_bagel$aip_4)

##                                       
##                                         0  1
##   Other                                 2  2
##   Staphylococcus chromogenes           21  0
##   Staphylococcus haemolyticus          19  0
##   Staphylococcus sciuri                 6  0
##   Staphylococcus succinus              12  0
##   Staphylococcus xylosus/pseudoxylosus 17  1

fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$aip_4))

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$aip_4)
## p-value = 0.003651
## alternative hypothesis: two.sided

# Subtilosin A

table(metadata_bagel$wgs_species_2,metadata_bagel$subtilosin_a)

##                                       
##                                         1  0
##   Other                                 0  4
##   Staphylococcus chromogenes           13  8
##   Staphylococcus haemolyticus           0 19
##   Staphylococcus sciuri                 0  6
##   Staphylococcus succinus               0 12
##   Staphylococcus xylosus/pseudoxylosus  0 18

fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$subtilosin_a))

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$subtilosin_a)
## p-value = 4.126e-08
## alternative hypothesis: two.sided

# Putative bacteriocin 193

table(metadata_bagel$wgs_species_2,metadata_bagel$putative_bacteriocin_193)

##                                       
##                                         0  1
##   Other                                 4  0
##   Staphylococcus chromogenes           21  0
##   Staphylococcus haemolyticus          19  0
##   Staphylococcus sciuri                 2  4
##   Staphylococcus succinus              12  0
##   Staphylococcus xylosus/pseudoxylosus 18  0

fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$putative_bacteriocin_193))

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$putative_bacteriocin_193)
## p-value = 1.012e-05
## alternative hypothesis: two.sided

# Putative bacteriocin 194

table(metadata_bagel$wgs_species_2,metadata_bagel$putative_bacteriocin_194)

##                                       
##                                         0  1
##   Other                                 3  1
##   Staphylococcus chromogenes           21  0
##   Staphylococcus haemolyticus          19  0
##   Staphylococcus sciuri                 6  0
##   Staphylococcus succinus              12  0
##   Staphylococcus xylosus/pseudoxylosus 11  7

fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$putative_bacteriocin_194))

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$putative_bacteriocin_194)
## p-value = 0.0002158
## alternative hypothesis: two.sided

2.1 SCH

# --- Inhibitory activity SCH ----

# AIP1

table1(~aip_1|case_control,data=mb_chromogenes)

	Control (N=12)	Case (N=9)	Overall (N=21)
aip_1
0	1 (8.3%)	4 (44.4%)	5 (23.8%)
1	11 (91.7%)	5 (55.6%)	16 (76.2%)

model_1 <- glm(aip_1 ~ case_control,family=binomial(link="logit"),data=mb_chromogenes)
summary(model_1)

## 
## Call:
## glm(formula = aip_1 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_chromogenes)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)  
## (Intercept)         2.398      1.044   2.296   0.0217 *
## case_controlCase   -2.175      1.241  -1.752   0.0798 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 23.053  on 20  degrees of freedom
## Residual deviance: 19.249  on 19  degrees of freedom
## AIC: 23.249
## 
## Number of Fisher Scoring iterations: 5

exp(Confint(model_1))

##                    Estimate       2.5 %     97.5 %
## (Intercept)      11.0000000 2.140159987 201.083018
## case_controlCase  0.1136364 0.005045515   1.010237

effectsize::oddsratio_to_riskratio(model_1)

## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.92) - make sure your intercept
##   is meaningful.

## CIs are back-transformed from the logit scale.

## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.92 |             
## case control [Case] |       0.61 | [0.06, 1.00]

## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.

emmeans(model_1,~case_control,type="response")

##  case_control  prob     SE  df asymp.LCL asymp.UCL
##  Control      0.917 0.0798 Inf     0.587     0.988
##  Case         0.556 0.1656 Inf     0.251     0.823
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale

# AIP2

table1(~aip_2|case_control,data=mb_chromogenes)

	Control (N=12)	Case (N=9)	Overall (N=21)
aip_2
0	11 (91.7%)	5 (55.6%)	16 (76.2%)
1	1 (8.3%)	4 (44.4%)	5 (23.8%)

model_2 <- glm(aip_2 ~ case_control,family=binomial(link="logit"),data=mb_chromogenes)
summary(model_2)

## 
## Call:
## glm(formula = aip_2 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_chromogenes)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)  
## (Intercept)        -2.398      1.044  -2.296   0.0217 *
## case_controlCase    2.175      1.241   1.752   0.0798 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 23.053  on 20  degrees of freedom
## Residual deviance: 19.249  on 19  degrees of freedom
## AIC: 23.249
## 
## Number of Fisher Scoring iterations: 5

exp(Confint(model_2))

##                    Estimate      2.5 %      97.5 %
## (Intercept)      0.09090909 0.00497307   0.4672548
## case_controlCase 8.80000000 0.98986680 198.1958191

effectsize::oddsratio_to_riskratio(model_2)

## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.08) - make sure your intercept
##   is meaningful.

## CIs are back-transformed from the logit scale.

## Parameter           | Risk Ratio |        95% CI
## ------------------------------------------------
## (Intercept)         |       0.08 |              
## case control [Case] |       5.33 | [0.99, 11.37]

## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.

emmeans(model_2,~case_control,type="response")

##  case_control   prob     SE  df asymp.LCL asymp.UCL
##  Control      0.0833 0.0798 Inf    0.0116     0.413
##  Case         0.4444 0.1656 Inf    0.1768     0.749
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale

# Subtilosin A

table1(~subtilosin_a|case_control,data=mb_chromogenes)

	Control (N=12)	Case (N=9)	Overall (N=21)
subtilosin_a
1	7 (58.3%)	6 (66.7%)	13 (61.9%)
0	5 (41.7%)	3 (33.3%)	8 (38.1%)

model_3 <- glm(subtilosin_a ~ case_control,family=binomial(link="logit"),data=mb_chromogenes)
summary(model_3)

## 
## Call:
## glm(formula = subtilosin_a ~ case_control, family = binomial(link = "logit"), 
##     data = mb_chromogenes)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)       -0.3365     0.5855  -0.575    0.566
## case_controlCase  -0.3567     0.9181  -0.389    0.698
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 27.910  on 20  degrees of freedom
## Residual deviance: 27.758  on 19  degrees of freedom
## AIC: 31.758
## 
## Number of Fisher Scoring iterations: 4

exp(Confint(model_3))

##                   Estimate     2.5 %   97.5 %
## (Intercept)      0.7142857 0.2113989 2.237832
## case_controlCase 0.7000000 0.1053801 4.183982

effectsize::oddsratio_to_riskratio(model_3)

## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.42) - make sure your intercept
##   is meaningful.

## CIs are back-transformed from the logit scale.

## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.42 |             
## case control [Case] |       0.80 | [0.17, 1.80]

## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.

emmeans(model_3,~case_control,type="response")

##  case_control  prob    SE  df asymp.LCL asymp.UCL
##  Control      0.417 0.142 Inf     0.185     0.692
##  Case         0.333 0.157 Inf     0.111     0.667
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale

# Association between AMPs and MIC in SCH

# AIP1

model_1 <- lm(mic_sau ~ aip_1,data=mb_chromogenes)
summary(model_1)

## 
## Call:
## lm(formula = mic_sau ~ aip_1, data = mb_chromogenes)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7647 -0.3097 -0.0300  0.2353  1.1553 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.1900     0.2533  12.594 4.79e-10 ***
## aip_11        0.1647     0.2851   0.578    0.571    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5066 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01925,    Adjusted R-squared:  -0.03844 
## F-statistic: 0.3337 on 1 and 17 DF,  p-value: 0.5711

model_1 <- lm(mic_sub ~ aip_1,data=mb_chromogenes)
summary(model_1)

## 
## Call:
## lm(formula = mic_sub ~ aip_1, data = mb_chromogenes)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -0.578 -0.383 -0.088  0.206  2.092 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.9400     0.3093   9.507 3.23e-08 ***
## aip_11        0.1480     0.3481   0.425    0.676    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6185 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01052,    Adjusted R-squared:  -0.04768 
## F-statistic: 0.1808 on 1 and 17 DF,  p-value: 0.676

# AIP2

model_2 <- lm(mic_sau ~ aip_2,data=mb_chromogenes)
summary(model_2)

## 
## Call:
## lm(formula = mic_sau ~ aip_2, data = mb_chromogenes)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7647 -0.3097 -0.0300  0.2353  1.1553 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.3547     0.1308  25.647 4.97e-15 ***
## aip_21       -0.1647     0.2851  -0.578    0.571    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5066 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01925,    Adjusted R-squared:  -0.03844 
## F-statistic: 0.3337 on 1 and 17 DF,  p-value: 0.5711

model_2 <- lm(mic_sub ~ aip_2,data=mb_chromogenes)
summary(model_2)

## 
## Call:
## lm(formula = mic_sub ~ aip_2, data = mb_chromogenes)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -0.578 -0.383 -0.088  0.206  2.092 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.0880     0.1597  19.336 5.19e-13 ***
## aip_21       -0.1480     0.3481  -0.425    0.676    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6185 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01052,    Adjusted R-squared:  -0.04768 
## F-statistic: 0.1808 on 1 and 17 DF,  p-value: 0.676

# Subtilosin A

model_3 <- lm(mic_sau ~ subtilosin_a,data=mb_chromogenes)
summary(model_3)

## 
## Call:
## lm(formula = mic_sau ~ subtilosin_a, data = mb_chromogenes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84273 -0.33886  0.07727  0.20614  1.07727 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     3.4327     0.1484  23.137 2.73e-14 ***
## subtilosin_a0  -0.2677     0.2286  -1.171    0.258    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4921 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.07463,    Adjusted R-squared:  0.0202 
## F-statistic: 1.371 on 1 and 17 DF,  p-value: 0.2578

model_3 <- lm(mic_sub ~ subtilosin_a,data=mb_chromogenes)
summary(model_3)

## 
## Call:
## lm(formula = mic_sub ~ subtilosin_a, data = mb_chromogenes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55909 -0.41455 -0.06909  0.18545  2.11091 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.06909    0.18742  16.375 7.64e-12 ***
## subtilosin_a0 -0.02909    0.28884  -0.101    0.921    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6216 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.0005964,  Adjusted R-squared:  -0.05819 
## F-statistic: 0.01014 on 1 and 17 DF,  p-value: 0.921

# Clades within this group


# List of target isolates

target_isolates <- c("SCH1", "SCH3", "SCH5", "SCH8", "SCH15", "SCH16", "SCH17", "SCH18", "SCH20")

# Create a new column 'presence' based on target isolates
mb_chromogenes <- mb_chromogenes %>%
  mutate(clade = ifelse(wgs_isolate_id %in% target_isolates, "Present", "Absent"))


# List of target isolates
target_isolates_2 <- c("SCH2", "SCH11", "SCH21", "SCH9")

# Create a new column 'presence' based on target isolates
mb_chromogenes <- mb_chromogenes %>%
  mutate(clade_2 = ifelse(wgs_isolate_id %in% target_isolates_2, "Present", "Absent"))


table1(~mic_sau + mic_sub+sau_inhibition_category+sub_inhibition_category+subtilosin_a|clade,data=subset(mb_chromogenes,mb_chromogenes$wgs_species_3!="SAU"))

	Absent (N=12)	Present (N=9)	Overall (N=21)
mic_sau
Mean (SD)	3.30 (0.409)	3.35 (0.629)	3.32 (0.497)
Median [Min, Max]	3.25 [2.74, 4.18]	3.37 [2.59, 4.51]	3.25 [2.59, 4.51]
Missing	1 (8.3%)	1 (11.1%)	2 (9.5%)
mic_sub
Mean (SD)	3.21 (0.712)	2.85 (0.362)	3.06 (0.604)
Median [Min, Max]	3.16 [2.61, 5.18]	2.71 [2.51, 3.51]	3.00 [2.51, 5.18]
Missing	1 (8.3%)	1 (11.1%)	2 (9.5%)
sau_inhibition_category
11 to 20	2 (16.7%)	0 (0%)	2 (9.5%)
21 to 30	1 (8.3%)	1 (11.1%)	2 (9.5%)
31 to 40	5 (41.7%)	1 (11.1%)	6 (28.6%)
41 to 50	2 (16.7%)	2 (22.2%)	4 (19.0%)
61 to 70	1 (8.3%)	1 (11.1%)	2 (9.5%)
51 to 60	0 (0%)	1 (11.1%)	1 (4.8%)
Top 10	0 (0%)	2 (22.2%)	2 (9.5%)
Missing	1 (8.3%)	1 (11.1%)	2 (9.5%)
sub_inhibition_category
>70	1 (8.3%)	0 (0%)	1 (4.8%)
11 to 20	3 (25.0%)	2 (22.2%)	5 (23.8%)
21 to 30	1 (8.3%)	2 (22.2%)	3 (14.3%)
31 to 40	4 (33.3%)	1 (11.1%)	5 (23.8%)
41 to 50	2 (16.7%)	1 (11.1%)	3 (14.3%)
Top 10	0 (0%)	2 (22.2%)	2 (9.5%)
Missing	1 (8.3%)	1 (11.1%)	2 (9.5%)
subtilosin_a
1	4 (33.3%)	9 (100%)	13 (61.9%)
0	8 (66.7%)	0 (0%)	8 (38.1%)

# Isolates inside and outside clade

model <- lm(mic_sau~clade,data=mb_chromogenes)
summary(model)

## 
## Call:
## lm(formula = mic_sau ~ clade, data = mb_chromogenes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76125 -0.27926 -0.04727  0.26074  1.15875 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.29727    0.15400  21.411  9.8e-14 ***
## cladePresent  0.05398    0.23733   0.227    0.823    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5108 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.003034,   Adjusted R-squared:  -0.05561 
## F-statistic: 0.05173 on 1 and 17 DF,  p-value: 0.8228

Confint(model)

##                Estimate      2.5 %    97.5 %
## (Intercept)  3.29727273  2.9723606 3.6221848
## cladePresent 0.05397727 -0.4467459 0.5547004

emmeans(model,~clade)

##  clade   emmean    SE df lower.CL upper.CL
##  Absent    3.30 0.154 17     2.97     3.62
##  Present   3.35 0.181 17     2.97     3.73
## 
## Confidence level used: 0.95

model <- lm(mic_sub~clade,data=mb_chromogenes)
summary(model)

## 
## Call:
## lm(formula = mic_sub ~ clade, data = mb_chromogenes)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5964 -0.3162 -0.1013  0.1112  1.9736 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.2064     0.1790  17.917 1.79e-12 ***
## cladePresent  -0.3551     0.2758  -1.288    0.215    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5935 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.08886,    Adjusted R-squared:  0.03527 
## F-statistic: 1.658 on 1 and 17 DF,  p-value: 0.2151

Confint(model)

##                Estimate      2.5 %    97.5 %
## (Intercept)   3.2063636  2.8288051 3.5839221
## cladePresent -0.3551136 -0.9369704 0.2267431

emmeans(model,~clade)

##  clade   emmean    SE df lower.CL upper.CL
##  Absent    3.21 0.179 17     2.83     3.58
##  Present   2.85 0.210 17     2.41     3.29
## 
## Confidence level used: 0.95

# AIP1

table1(~aip_1|clade,data=mb_chromogenes)

	Absent (N=12)	Present (N=9)	Overall (N=21)
aip_1
0	5 (41.7%)	0 (0%)	5 (23.8%)
1	7 (58.3%)	9 (100%)	16 (76.2%)

fisher.test(table(mb_chromogenes$aip_1,mb_chromogenes$clade))

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_chromogenes$aip_1, mb_chromogenes$clade)
## p-value = 0.04511
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.8206881       Inf
## sample estimates:
## odds ratio 
##        Inf

# AIP2

table1(~aip_2|clade,data=mb_chromogenes)

	Absent (N=12)	Present (N=9)	Overall (N=21)
aip_2
0	7 (58.3%)	9 (100%)	16 (76.2%)
1	5 (41.7%)	0 (0%)	5 (23.8%)

fisher.test(table(mb_chromogenes$aip_2,mb_chromogenes$clade))

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_chromogenes$aip_2, mb_chromogenes$clade)
## p-value = 0.04511
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.00000 1.21849
## sample estimates:
## odds ratio 
##          0

# Subtilosin A

table1(~subtilosin_a|clade,data=mb_chromogenes)

	Absent (N=12)	Present (N=9)	Overall (N=21)
subtilosin_a
1	4 (33.3%)	9 (100%)	13 (61.9%)
0	8 (66.7%)	0 (0%)	8 (38.1%)

fisher.test(table(mb_chromogenes$subtilosin_a,mb_chromogenes$clade))

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_chromogenes$subtilosin_a, mb_chromogenes$clade)
## p-value = 0.0046
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.0000000 0.4517167
## sample estimates:
## odds ratio 
##          0

# Isolates inside and outside clade 2

model <- lm(mic_sau~clade_2,data=mb_chromogenes)
summary(model)

## 
## Call:
## lm(formula = mic_sau ~ clade_2, data = mb_chromogenes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.75813 -0.30313 -0.06813  0.24188  1.16187 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      3.3481     0.1267  26.420 3.03e-15 ***
## clade_2Present  -0.1781     0.3189  -0.559    0.584    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5069 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01802,    Adjusted R-squared:  -0.03974 
## F-statistic: 0.3119 on 1 and 17 DF,  p-value: 0.5838

Confint(model)

##                 Estimate      2.5 %    97.5 %
## (Intercept)     3.348125  3.0807545 3.6154955
## clade_2Present -0.178125 -0.8509926 0.4947426

emmeans(model,~clade_2)

##  clade_2 emmean    SE df lower.CL upper.CL
##  Absent    3.35 0.127 17     3.08     3.62
##  Present   3.17 0.293 17     2.55     3.79
## 
## Confidence level used: 0.95

model <- lm(mic_sub~clade_2,data=mb_chromogenes)
summary(model)

## 
## Call:
## lm(formula = mic_sub ~ clade_2, data = mb_chromogenes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.58813 -0.39312 -0.09667  0.16687  2.08187 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      3.0981     0.1534  20.197 2.55e-13 ***
## clade_2Present  -0.2615     0.3860  -0.677    0.507    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6136 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.02628,    Adjusted R-squared:  -0.031 
## F-statistic: 0.4587 on 1 and 17 DF,  p-value: 0.5073

Confint(model)

##                  Estimate     2.5 %    97.5 %
## (Intercept)     3.0981250  2.774496 3.4217537
## clade_2Present -0.2614583 -1.075906 0.5529893

emmeans(model,~clade_2)

##  clade_2 emmean    SE df lower.CL upper.CL
##  Absent    3.10 0.153 17     2.77     3.42
##  Present   2.84 0.354 17     2.09     3.58
## 
## Confidence level used: 0.95

# AIP1

table1(~aip_1|clade_2,data=mb_chromogenes)

	Absent (N=17)	Present (N=4)	Overall (N=21)
aip_1
0	1 (5.9%)	4 (100%)	5 (23.8%)
1	16 (94.1%)	0 (0%)	16 (76.2%)

fisher.test(table(mb_chromogenes$aip_1,mb_chromogenes$clade_2))

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_chromogenes$aip_1, mb_chromogenes$clade_2)
## p-value = 0.0008354
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.0000000 0.2697048
## sample estimates:
## odds ratio 
##          0

# AIP2

table1(~aip_2|clade_2,data=mb_chromogenes)

	Absent (N=17)	Present (N=4)	Overall (N=21)
aip_2
0	16 (94.1%)	0 (0%)	16 (76.2%)
1	1 (5.9%)	4 (100%)	5 (23.8%)

fisher.test(table(mb_chromogenes$aip_2,mb_chromogenes$clade_2))

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_chromogenes$aip_2, mb_chromogenes$clade_2)
## p-value = 0.0008354
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  3.707758      Inf
## sample estimates:
## odds ratio 
##        Inf

# Subtilosin A

table1(~subtilosin_a|clade_2,data=mb_chromogenes)

	Absent (N=17)	Present (N=4)	Overall (N=21)
subtilosin_a
1	10 (58.8%)	3 (75.0%)	13 (61.9%)
0	7 (41.2%)	1 (25.0%)	8 (38.1%)

fisher.test(table(mb_chromogenes$subtilosin_a,mb_chromogenes$clade_2))

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_chromogenes$subtilosin_a, mb_chromogenes$clade_2)
## p-value = 1
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.007958673 7.734743347
## sample estimates:
## odds ratio 
##  0.4921903

2.2 SHAEM

# --- AMPs in SHAEM----

# none of the AMPs were both present and absent in at least 2 isolates

# Inhibitory activity was absent in most isolates

2.3 SSC

# ---- AMPs in SSC ----

# AIP1

table1(~aip_1|case_control,data=mb_sciuri)

	Control (N=3)	Case (N=3)	Overall (N=6)
aip_1
0	1 (33.3%)	2 (66.7%)	3 (50.0%)
1	2 (66.7%)	1 (33.3%)	3 (50.0%)

model_1 <- glm(aip_1 ~ case_control,family=binomial(link="logit"),data=mb_sciuri)
summary(model_1)

## 
## Call:
## glm(formula = aip_1 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_sciuri)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)        0.6931     1.2247   0.566    0.571
## case_controlCase  -1.3863     1.7321  -0.800    0.423
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 8.3178  on 5  degrees of freedom
## Residual deviance: 7.6382  on 4  degrees of freedom
## AIC: 11.638
## 
## Number of Fisher Scoring iterations: 4

exp(Confint(model_1))

##                  Estimate       2.5 %    97.5 %
## (Intercept)          2.00 0.191564939 43.037669
## case_controlCase     0.25 0.004803826  6.562236

effectsize::oddsratio_to_riskratio(model_1)

## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.67) - make sure your intercept
##   is meaningful.

## CIs are back-transformed from the logit scale.

## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.67 |             
## case control [Case] |       0.50 | [0.01, 1.39]

## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.

emmeans(model_1,~case_control,type="response")

##  case_control  prob    SE  df asymp.LCL asymp.UCL
##  Control      0.667 0.272 Inf    0.1535     0.957
##  Case         0.333 0.272 Inf    0.0434     0.846
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale

# Putative bacteriocin 193

table1(~putative_bacteriocin_193|case_control,data=mb_sciuri)

	Control (N=3)	Case (N=3)	Overall (N=6)
putative_bacteriocin_193
0	1 (33.3%)	1 (33.3%)	2 (33.3%)
1	2 (66.7%)	2 (66.7%)	4 (66.7%)

model_2 <- glm(putative_bacteriocin_193 ~ case_control,family=binomial(link="logit"),data=mb_sciuri)
summary(model_2)

## 
## Call:
## glm(formula = putative_bacteriocin_193 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_sciuri)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)
## (Intercept)      6.931e-01  1.225e+00   0.566    0.571
## case_controlCase 1.923e-16  1.732e+00   0.000    1.000
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 7.6382  on 5  degrees of freedom
## Residual deviance: 7.6382  on 4  degrees of freedom
## AIC: 11.638
## 
## Number of Fisher Scoring iterations: 4

exp(Confint(model_2))

##                  Estimate      2.5 %   97.5 %
## (Intercept)             2 0.19156494 43.03767
## case_controlCase        1 0.02475536 40.39530

effectsize::oddsratio_to_riskratio(model_2)

## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.67) - make sure your intercept
##   is meaningful.

## CIs are back-transformed from the logit scale.

## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.67 |             
## case control [Case] |       1.00 | [0.07, 1.48]

## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.

emmeans(model_2,~case_control,type="response")

##  case_control  prob    SE  df asymp.LCL asymp.UCL
##  Control      0.667 0.272 Inf     0.154     0.957
##  Case         0.667 0.272 Inf     0.154     0.957
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale

# Association between AMPs and MIC in SSCI

# AIP1

model_1 <- lm(mic_sau ~ aip_1,data=mb_sciuri)
summary(model_1)

## 
## Call:
## lm(formula = mic_sau ~ aip_1, data = mb_sciuri)
## 
## Residuals:
##     1     2     3     4     5     6 
## -0.09  0.10  0.92 -0.41 -0.01 -0.51 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.5400     0.3282  10.785 0.000419 ***
## aip_11       -0.4300     0.4642  -0.926 0.406692    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5685 on 4 degrees of freedom
## Multiple R-squared:  0.1766, Adjusted R-squared:  -0.0292 
## F-statistic: 0.8581 on 1 and 4 DF,  p-value: 0.4067

model_1 <- lm(mic_sub ~ aip_1,data=mb_sciuri)
summary(model_1)

## 
## Call:
## lm(formula = mic_sub ~ aip_1, data = mb_sciuri)
## 
## Residuals:
##       1       2       3       4       5       6 
## -0.0900  0.1000  0.5867  0.2567 -0.0100 -0.8433 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.54000    0.30815  11.488 0.000328 ***
## aip_11      -0.09667    0.43579  -0.222 0.835318    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5337 on 4 degrees of freedom
## Multiple R-squared:  0.01215,    Adjusted R-squared:  -0.2348 
## F-statistic: 0.0492 on 1 and 4 DF,  p-value: 0.8353

# putative bacteriocin 193

model_2 <- lm(mic_sau ~ putative_bacteriocin_193,data=mb_sciuri)
summary(model_2)

## 
## Call:
## lm(formula = mic_sau ~ putative_bacteriocin_193, data = mb_sciuri)
## 
## Residuals:
##      1      2      3      4      5      6 
##  0.145  0.335  0.665 -0.665  0.225 -0.705 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                 3.3650     0.4423   7.607   0.0016 **
## putative_bacteriocin_1931  -0.0600     0.5418  -0.111   0.9171   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6256 on 4 degrees of freedom
## Multiple R-squared:  0.003057,   Adjusted R-squared:  -0.2462 
## F-statistic: 0.01227 on 1 and 4 DF,  p-value: 0.9171

model_2 <- lm(mic_sub ~ putative_bacteriocin_193,data=mb_sciuri)
summary(model_2)

## 
## Call:
## lm(formula = mic_sub ~ putative_bacteriocin_193, data = mb_sciuri)
## 
## Residuals:
##      1      2      3      4      5      6 
##  0.145  0.335  0.165 -0.165  0.225 -0.705 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 3.8650     0.3032  12.748 0.000218 ***
## putative_bacteriocin_1931  -0.5600     0.3713  -1.508 0.206010    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4288 on 4 degrees of freedom
## Multiple R-squared:  0.3625, Adjusted R-squared:  0.2031 
## F-statistic: 2.274 on 1 and 4 DF,  p-value: 0.206

# clades

# List of target isolates
target_isolates <- c("SSCI1", "SSCI2", "SSCI5", "SSCI6")


# Create a new column 'presence' based on target isolates
mb_sciuri <- mb_sciuri %>%
  mutate(clade = ifelse(wgs_isolate_id %in% target_isolates, "Present", "Absent"))


table1(~mic_sau + mic_sub+sau_inhibition_category+sub_inhibition_category+case_control+aip_1+aip_2+aip_3+aip_4+sactipeptides+subtilosin_a+putative_bacteriocin_193+putative_bacteriocin_194|clade,data=mb_sciuri)

	Absent (N=2)	Present (N=4)	Overall (N=6)
mic_sau
Mean (SD)	3.37 (0.940)	3.31 (0.476)	3.33 (0.560)
Median [Min, Max]	3.37 [2.70, 4.03]	3.49 [2.60, 3.64]	3.49 [2.60, 4.03]
mic_sub
Mean (SD)	3.87 (0.233)	3.31 (0.476)	3.49 (0.480)
Median [Min, Max]	3.87 [3.70, 4.03]	3.49 [2.60, 3.64]	3.59 [2.60, 4.03]
sau_inhibition_category
11 to 20	1 (50.0%)	0 (0%)	1 (16.7%)
51 to 60	1 (50.0%)	0 (0%)	1 (16.7%)
31 to 40	0 (0%)	1 (25.0%)	1 (16.7%)
41 to 50	0 (0%)	2 (50.0%)	2 (33.3%)
Top 10	0 (0%)	1 (25.0%)	1 (16.7%)
sub_inhibition_category
51 to 60	2 (100%)	0 (0%)	2 (33.3%)
11 to 20	0 (0%)	1 (25.0%)	1 (16.7%)
41 to 50	0 (0%)	3 (75.0%)	3 (50.0%)
case_control
Control	1 (50.0%)	2 (50.0%)	3 (50.0%)
Case	1 (50.0%)	2 (50.0%)	3 (50.0%)
aip_1
0	0 (0%)	3 (75.0%)	3 (50.0%)
1	2 (100%)	1 (25.0%)	3 (50.0%)
aip_2
0	2 (100%)	4 (100%)	6 (100%)
1	0 (0%)	0 (0%)	0 (0%)
aip_3
0	2 (100%)	4 (100%)	6 (100%)
1	0 (0%)	0 (0%)	0 (0%)
aip_4
0	2 (100%)	4 (100%)	6 (100%)
1	0 (0%)	0 (0%)	0 (0%)
sactipeptides
Mean (SD)	1.00 (0)	1.00 (0)	1.00 (0)
Median [Min, Max]	1.00 [1.00, 1.00]	1.00 [1.00, 1.00]	1.00 [1.00, 1.00]
subtilosin_a
1	0 (0%)	0 (0%)	0 (0%)
0	2 (100%)	4 (100%)	6 (100%)
putative_bacteriocin_193
0	2 (100%)	0 (0%)	2 (33.3%)
1	0 (0%)	4 (100%)	4 (66.7%)
putative_bacteriocin_194
0	2 (100%)	4 (100%)	6 (100%)
1	0 (0%)	0 (0%)	0 (0%)

# MIC across different clades

model <- lm(mic_sau~clade,data=mb_sciuri)
summary(model)

## 
## Call:
## lm(formula = mic_sau ~ clade, data = mb_sciuri)
## 
## Residuals:
##      1      2      3      4      5      6 
##  0.145  0.335  0.665 -0.665  0.225 -0.705 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    3.3650     0.4423   7.607   0.0016 **
## cladePresent  -0.0600     0.5418  -0.111   0.9171   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6256 on 4 degrees of freedom
## Multiple R-squared:  0.003057,   Adjusted R-squared:  -0.2462 
## F-statistic: 0.01227 on 1 and 4 DF,  p-value: 0.9171

Confint(model)

##              Estimate     2.5 %   97.5 %
## (Intercept)     3.365  2.136854 4.593146
## cladePresent   -0.060 -1.564165 1.444165

emmeans(model,~clade)

##  clade   emmean    SE df lower.CL upper.CL
##  Absent    3.37 0.442  4     2.14     4.59
##  Present   3.31 0.313  4     2.44     4.17
## 
## Confidence level used: 0.95

model <- lm(mic_sub~clade,data=mb_sciuri)
summary(model)

## 
## Call:
## lm(formula = mic_sub ~ clade, data = mb_sciuri)
## 
## Residuals:
##      1      2      3      4      5      6 
##  0.145  0.335  0.165 -0.165  0.225 -0.705 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.8650     0.3032  12.748 0.000218 ***
## cladePresent  -0.5600     0.3713  -1.508 0.206010    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4288 on 4 degrees of freedom
## Multiple R-squared:  0.3625, Adjusted R-squared:  0.2031 
## F-statistic: 2.274 on 1 and 4 DF,  p-value: 0.206

Confint(model)

##              Estimate     2.5 %   97.5 %
## (Intercept)     3.865  3.023234 4.706766
## cladePresent   -0.560 -1.590948 0.470948

emmeans(model,~clade)

##  clade   emmean    SE df lower.CL upper.CL
##  Absent    3.87 0.303  4     3.02     4.71
##  Present   3.31 0.214  4     2.71     3.90
## 
## Confidence level used: 0.95

# AIP1

table1(~aip_1|clade,data=mb_sciuri)

	Absent (N=2)	Present (N=4)	Overall (N=6)
aip_1
0	0 (0%)	3 (75.0%)	3 (50.0%)
1	2 (100%)	1 (25.0%)	3 (50.0%)

fisher.test(table(mb_sciuri$aip_1,mb_sciuri$clade))

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_sciuri$aip_1, mb_sciuri$clade)
## p-value = 0.4
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.000000 4.922984
## sample estimates:
## odds ratio 
##          0

# putative bacteriocin 193

table1(~putative_bacteriocin_193|clade,data=mb_sciuri)

	Absent (N=2)	Present (N=4)	Overall (N=6)
putative_bacteriocin_193
0	2 (100%)	0 (0%)	2 (33.3%)
1	0 (0%)	4 (100%)	4 (66.7%)

fisher.test(table(mb_sciuri$putative_bacteriocin_193,mb_sciuri$clade))

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_sciuri$putative_bacteriocin_193, mb_sciuri$clade)
## p-value = 0.06667
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.5079839       Inf
## sample estimates:
## odds ratio 
##        Inf

2.4 SSUC

# ---- AMPs in SSUC ----

# none of the AMPs were both present and absent in at least 2 isolates

# Clades

# List of target isolates
target_isolates <- c("SSUC1", "SSUC2", "SSUC3", "SSUC4", "SSUC5", "SSUC10", "SSUC12")


# Create a new column 'presence' based on target isolates
mb_succinus <- mb_succinus %>%
  mutate(clade = ifelse(wgs_isolate_id %in% target_isolates, "Present", "Absent"))


table1(~mic_sau + mic_sub+sau_inhibition_category+sub_inhibition_category+subtilosin_a|clade,data=subset(mb_succinus,mb_succinus$wgs_species_3!="SAU"))

	Absent (N=5)	Present (N=7)	Overall (N=12)
mic_sau
Mean (SD)	3.50 (0.443)	2.44 (0.430)	2.92 (0.692)
Median [Min, Max]	3.58 [3.03, 4.11]	2.40 [1.92, 3.14]	3.03 [1.92, 4.11]
Missing	0 (0%)	1 (14.3%)	1 (8.3%)
mic_sub
Mean (SD)	3.50 (0.649)	2.44 (0.884)	2.92 (0.931)
Median [Min, Max]	3.68 [2.58, 4.11]	2.21 [1.65, 4.14]	2.58 [1.65, 4.14]
Missing	0 (0%)	1 (14.3%)	1 (8.3%)
sau_inhibition_category
21 to 30	2 (40.0%)	1 (14.3%)	3 (25.0%)
41 to 50	2 (40.0%)	0 (0%)	2 (16.7%)
51 to 60	1 (20.0%)	0 (0%)	1 (8.3%)
Top 10	0 (0%)	5 (71.4%)	5 (41.7%)
Missing	0 (0%)	1 (14.3%)	1 (8.3%)
sub_inhibition_category
11 to 20	1 (20.0%)	0 (0%)	1 (8.3%)
31 to 40	1 (20.0%)	0 (0%)	1 (8.3%)
41 to 50	1 (20.0%)	0 (0%)	1 (8.3%)
51 to 60	1 (20.0%)	0 (0%)	1 (8.3%)
61 to 70	1 (20.0%)	1 (14.3%)	2 (16.7%)
Top 10	0 (0%)	5 (71.4%)	5 (41.7%)
Missing	0 (0%)	1 (14.3%)	1 (8.3%)
subtilosin_a
1	0 (0%)	0 (0%)	0 (0%)
0	5 (100%)	7 (100%)	12 (100%)

# MIC across different clades

model <- lm(mic_sau~clade,data=mb_succinus)
summary(model)

## 
## Call:
## lm(formula = mic_sau ~ clade, data = mb_succinus)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -0.520 -0.346  0.070  0.194  0.700 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.5020     0.1949  17.971 2.33e-08 ***
## cladePresent  -1.0620     0.2639  -4.025    0.003 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4357 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6429, Adjusted R-squared:  0.6032 
## F-statistic:  16.2 on 1 and 9 DF,  p-value: 0.002996

Confint(model)

##              Estimate     2.5 %     97.5 %
## (Intercept)     3.502  3.061169  3.9428312
## cladePresent   -1.062 -1.658888 -0.4651118

emmeans(model,~clade)

##  clade   emmean    SE df lower.CL upper.CL
##  Absent    3.50 0.195  9     3.06     3.94
##  Present   2.44 0.178  9     2.04     2.84
## 
## Confidence level used: 0.95

model <- lm(mic_sub~clade,data=mb_succinus)
summary(model)

## 
## Call:
## lm(formula = mic_sub ~ clade, data = mb_succinus)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -0.922 -0.456 -0.160  0.353  1.700 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.5020     0.3524   9.937 3.77e-06 ***
## cladePresent  -1.0620     0.4772  -2.226   0.0531 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.788 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.355,  Adjusted R-squared:  0.2833 
## F-statistic: 4.953 on 1 and 9 DF,  p-value: 0.05308

Confint(model)

##              Estimate     2.5 %     97.5 %
## (Intercept)     3.502  2.704779 4.29922133
## cladePresent   -1.062 -2.141443 0.01744278

emmeans(model,~clade)

##  clade   emmean    SE df lower.CL upper.CL
##  Absent    3.50 0.352  9     2.70     4.30
##  Present   2.44 0.322  9     1.71     3.17
## 
## Confidence level used: 0.95

2.5 SXYL & SPXYL

# ---- AMPs in SXYL/PXYL ----

# AIP2

table1(~aip_2|case_control,data=mb_xylosus)

	Control (N=8)	Case (N=10)	Overall (N=18)
aip_2
0	2 (25.0%)	4 (40.0%)	6 (33.3%)
1	6 (75.0%)	6 (60.0%)	12 (66.7%)

model_1 <- glm(aip_2 ~ case_control,family=binomial(link="logit"),data=mb_xylosus)
summary(model_1)

## 
## Call:
## glm(formula = aip_2 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_xylosus)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)        1.0986     0.8165   1.346    0.178
## case_controlCase  -0.6931     1.0408  -0.666    0.505
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 22.915  on 17  degrees of freedom
## Residual deviance: 22.458  on 16  degrees of freedom
## AIC: 26.458
## 
## Number of Fisher Scoring iterations: 4

exp(Confint(model_1))

##                  Estimate      2.5 %    97.5 %
## (Intercept)           3.0 0.69120194 20.475683
## case_controlCase      0.5 0.05327731  3.663896

effectsize::oddsratio_to_riskratio(model_1)

## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.75) - make sure your intercept
##   is meaningful.

## CIs are back-transformed from the logit scale.

## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.75 |             
## case control [Case] |       0.80 | [0.18, 1.22]

## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.

emmeans(model_1,~case_control,type="response")

##  case_control prob    SE  df asymp.LCL asymp.UCL
##  Control      0.75 0.153 Inf     0.377     0.937
##  Case         0.60 0.155 Inf     0.297     0.842
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale

# AIP3

table1(~aip_3|case_control,data=mb_xylosus)

	Control (N=8)	Case (N=10)	Overall (N=18)
aip_3
0	6 (75.0%)	7 (70.0%)	13 (72.2%)
1	2 (25.0%)	3 (30.0%)	5 (27.8%)

model_2 <- glm(aip_3 ~ case_control,family=binomial(link="logit"),data=mb_xylosus)
summary(model_2)

## 
## Call:
## glm(formula = aip_3 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_xylosus)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)       -1.0986     0.8165  -1.346    0.178
## case_controlCase   0.2513     1.0690   0.235    0.814
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 21.270  on 17  degrees of freedom
## Residual deviance: 21.215  on 16  degrees of freedom
## AIC: 25.215
## 
## Number of Fisher Scoring iterations: 4

exp(Confint(model_2))

##                   Estimate      2.5 %    97.5 %
## (Intercept)      0.3333333 0.04883842  1.446755
## case_controlCase 1.2857143 0.15782429 12.403957

effectsize::oddsratio_to_riskratio(model_2)

## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.25) - make sure your intercept
##   is meaningful.

## CIs are back-transformed from the logit scale.

## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.25 |             
## case control [Case] |       1.20 | [0.20, 3.22]

## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.

emmeans(model_2,~case_control,type="response")

##  case_control prob    SE  df asymp.LCL asymp.UCL
##  Control      0.25 0.153 Inf    0.0630     0.623
##  Case         0.30 0.145 Inf    0.0998     0.624
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale

# putative bacteriocin 194

table1(~putative_bacteriocin_194|case_control,data=mb_xylosus)

	Control (N=8)	Case (N=10)	Overall (N=18)
putative_bacteriocin_194
0	5 (62.5%)	6 (60.0%)	11 (61.1%)
1	3 (37.5%)	4 (40.0%)	7 (38.9%)

model_3 <- glm(putative_bacteriocin_194 ~ case_control,family=binomial(link="logit"),data=mb_xylosus)
summary(model_3)

## 
## Call:
## glm(formula = putative_bacteriocin_194 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_xylosus)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)       -0.5108     0.7303  -0.699    0.484
## case_controlCase   0.1054     0.9747   0.108    0.914
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 24.057  on 17  degrees of freedom
## Residual deviance: 24.045  on 16  degrees of freedom
## AIC: 28.045
## 
## Number of Fisher Scoring iterations: 4

exp(Confint(model_3))

##                  Estimate     2.5 %   97.5 %
## (Intercept)      0.600000 0.1230568 2.445302
## case_controlCase 1.111111 0.1616472 8.069622

effectsize::oddsratio_to_riskratio(model_3)

## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.38) - make sure your intercept
##   is meaningful.

## CIs are back-transformed from the logit scale.

## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.38 |             
## case control [Case] |       1.07 | [0.24, 2.21]

## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.

emmeans(model_3,~case_control,type="response")

##  case_control  prob    SE  df asymp.LCL asymp.UCL
##  Control      0.375 0.171 Inf     0.125     0.715
##  Case         0.400 0.155 Inf     0.158     0.703
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale

# Association between AMPs and MIC in SXYL

# AIP2

model_1 <- lm(mic_sau ~ aip_2,data=mb_xylosus)
summary(model_1)

## 
## Call:
## lm(formula = mic_sau ~ aip_2, data = mb_xylosus)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.43500 -0.25917 -0.13417  0.04438  0.94500 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.1450     0.1733   18.15 4.25e-12 ***
## aip_21       -0.1358     0.2122   -0.64    0.531    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4244 on 16 degrees of freedom
## Multiple R-squared:  0.02497,    Adjusted R-squared:  -0.03597 
## F-statistic: 0.4097 on 1 and 16 DF,  p-value: 0.5312

model_1 <- lm(mic_sub ~ aip_2,data=mb_xylosus)
summary(model_1)

## 
## Call:
## lm(formula = mic_sub ~ aip_2, data = mb_xylosus)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.26833 -0.38250 -0.06542  0.56354  1.11167 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.9783     0.2678  11.122 6.13e-09 ***
## aip_21        0.1142     0.3280   0.348    0.732    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6559 on 16 degrees of freedom
## Multiple R-squared:  0.007517,   Adjusted R-squared:  -0.05451 
## F-statistic: 0.1212 on 1 and 16 DF,  p-value: 0.7323

# AIP3

model_1 <- lm(mic_sau ~ aip_3,data=mb_xylosus)
summary(model_1)

## 
## Call:
## lm(formula = mic_sau ~ aip_3, data = mb_xylosus)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.47800 -0.25308 -0.08808  0.04442  0.92692 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.0031     0.1167  25.734  1.9e-14 ***
## aip_31        0.1849     0.2214   0.835    0.416    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4208 on 16 degrees of freedom
## Multiple R-squared:  0.04177,    Adjusted R-squared:  -0.01811 
## F-statistic: 0.6975 on 1 and 16 DF,  p-value: 0.4159

model_1 <- lm(mic_sub ~ aip_3,data=mb_xylosus)
summary(model_1)

## 
## Call:
## lm(formula = mic_sub ~ aip_3, data = mb_xylosus)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2780 -0.3700 -0.1100  0.5705  1.1020 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.0800     0.1822  16.904 1.26e-11 ***
## aip_31       -0.0920     0.3457  -0.266    0.794    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.657 on 16 degrees of freedom
## Multiple R-squared:  0.004407,   Adjusted R-squared:  -0.05782 
## F-statistic: 0.07082 on 1 and 16 DF,  p-value: 0.7935

# putative bacteriocin 194

model_2 <- lm(mic_sau ~ putative_bacteriocin_194,data=mb_xylosus)
summary(model_2)

## 
## Call:
## lm(formula = mic_sau ~ putative_bacteriocin_194, data = mb_xylosus)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41091 -0.26591 -0.17403  0.06286  0.98286 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                3.02091    0.12890  23.436  8.2e-14 ***
## putative_bacteriocin_1941  0.08623    0.20670   0.417    0.682    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4275 on 16 degrees of freedom
## Multiple R-squared:  0.01076,    Adjusted R-squared:  -0.05107 
## F-statistic: 0.1741 on 1 and 16 DF,  p-value: 0.6821

model_2 <- lm(mic_sub ~ putative_bacteriocin_194,data=mb_xylosus)
summary(model_2)

## 
## Call:
## lm(formula = mic_sub ~ putative_bacteriocin_194, data = mb_xylosus)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.25429 -0.40182 -0.05805  0.56756  1.12571 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 3.1118     0.1972  15.782 3.56e-11 ***
## putative_bacteriocin_1941  -0.1475     0.3162  -0.467    0.647    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.654 on 16 degrees of freedom
## Multiple R-squared:  0.01342,    Adjusted R-squared:  -0.04824 
## F-statistic: 0.2177 on 1 and 16 DF,  p-value: 0.6471

# clades

target_isolates <- c("SXYL1", "SXYL2", "SXYL4", "SXYL10", "SXYL11", "SXYL13")

mb_xylosus <- mb_xylosus %>%
  mutate(clade = ifelse(wgs_isolate_id %in% target_isolates, "Present_xylosus", "Absent_xylosus"))


mb_xylosus$clade <- ifelse(mb_xylosus$wgs_species_3=="SXYL",mb_xylosus$clade,"Ps_xylosus")


table1(~mic_sau + mic_sub+sau_inhibition_category+sub_inhibition_category+case_control+aip_1+aip_2+aip_3+aip_4+sactipeptides+subtilosin_a+putative_bacteriocin_193+putative_bacteriocin_194|clade,data=mb_xylosus)

	Absent_xylosus (N=7)	Present_xylosus (N=6)	Ps_xylosus (N=5)	Overall (N=18)
mic_sau
Mean (SD)	2.98 (0.291)	3.14 (0.505)	3.06 (0.522)	3.05 (0.417)
Median [Min, Max]	2.85 [2.74, 3.56]	3.04 [2.71, 4.09]	3.01 [2.61, 3.93]	2.92 [2.61, 4.09]
mic_sub
Mean (SD)	3.12 (0.514)	2.97 (0.915)	3.06 (0.522)	3.05 (0.639)
Median [Min, Max]	2.93 [2.56, 3.85]	3.04 [1.71, 4.09]	3.01 [2.61, 3.93]	2.97 [1.71, 4.09]
sau_inhibition_category
11 to 20	4 (57.1%)	2 (33.3%)	1 (20.0%)	7 (38.9%)
21 to 30	1 (14.3%)	1 (16.7%)	2 (40.0%)	4 (22.2%)
31 to 40	1 (14.3%)	2 (33.3%)	0 (0%)	3 (16.7%)
41 to 50	1 (14.3%)	0 (0%)	0 (0%)	1 (5.6%)
51 to 60	0 (0%)	1 (16.7%)	1 (20.0%)	2 (11.1%)
Top 10	0 (0%)	0 (0%)	1 (20.0%)	1 (5.6%)
sub_inhibition_category
11 to 20	1 (14.3%)	0 (0%)	2 (40.0%)	3 (16.7%)
21 to 30	3 (42.9%)	1 (16.7%)	2 (40.0%)	6 (33.3%)
31 to 40	1 (14.3%)	1 (16.7%)	0 (0%)	2 (11.1%)
51 to 60	2 (28.6%)	1 (16.7%)	1 (20.0%)	4 (22.2%)
61 to 70	0 (0%)	1 (16.7%)	0 (0%)	1 (5.6%)
Top 10	0 (0%)	2 (33.3%)	0 (0%)	2 (11.1%)
case_control
Control	3 (42.9%)	3 (50.0%)	2 (40.0%)	8 (44.4%)
Case	4 (57.1%)	3 (50.0%)	3 (60.0%)	10 (55.6%)
aip_1
0	7 (100%)	6 (100%)	5 (100%)	18 (100%)
1	0 (0%)	0 (0%)	0 (0%)	0 (0%)
aip_2
0	2 (28.6%)	4 (66.7%)	0 (0%)	6 (33.3%)
1	5 (71.4%)	2 (33.3%)	5 (100%)	12 (66.7%)
aip_3
0	6 (85.7%)	2 (33.3%)	5 (100%)	13 (72.2%)
1	1 (14.3%)	4 (66.7%)	0 (0%)	5 (27.8%)
aip_4
0	6 (85.7%)	6 (100%)	5 (100%)	17 (94.4%)
1	1 (14.3%)	0 (0%)	0 (0%)	1 (5.6%)
sactipeptides
Mean (SD)	1.00 (0)	1.00 (0)	1.00 (0)	1.00 (0)
Median [Min, Max]	1.00 [1.00, 1.00]	1.00 [1.00, 1.00]	1.00 [1.00, 1.00]	1.00 [1.00, 1.00]
subtilosin_a
1	0 (0%)	0 (0%)	0 (0%)	0 (0%)
0	7 (100%)	6 (100%)	5 (100%)	18 (100%)
putative_bacteriocin_193
0	7 (100%)	6 (100%)	5 (100%)	18 (100%)
1	0 (0%)	0 (0%)	0 (0%)	0 (0%)
putative_bacteriocin_194
0	6 (85.7%)	0 (0%)	5 (100%)	11 (61.1%)
1	1 (14.3%)	6 (100%)	0 (0%)	7 (38.9%)

model <- lm(mic_sau~clade,data=mb_xylosus)
summary(model)

## 
## Call:
## lm(formula = mic_sau ~ clade, data = mb_xylosus)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.45200 -0.23810 -0.09029  0.03333  0.95333 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           2.97857    0.16547  18.001 1.44e-11 ***
## cladePresent_xylosus  0.15810    0.24356   0.649    0.526    
## cladePs_xylosus       0.08343    0.25634   0.325    0.749    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4378 on 15 degrees of freedom
## Multiple R-squared:  0.02745,    Adjusted R-squared:  -0.1022 
## F-statistic: 0.2117 on 2 and 15 DF,  p-value: 0.8116

Confint(model)

##                        Estimate      2.5 %    97.5 %
## (Intercept)          2.97857143  2.6258826 3.3312603
## cladePresent_xylosus 0.15809524 -0.3610487 0.6772391
## cladePs_xylosus      0.08342857 -0.4629546 0.6298118

emmeans(model,~clade)

##  clade           emmean    SE df lower.CL upper.CL
##  Absent_xylosus    2.98 0.165 15     2.63     3.33
##  Present_xylosus   3.14 0.179 15     2.76     3.52
##  Ps_xylosus        3.06 0.196 15     2.64     3.48
## 
## Confidence level used: 0.95

Anova(model,type="III")

## Anova Table (Type III tests)
## 
## Response: mic_sau
##             Sum Sq Df  F value    Pr(>F)    
## (Intercept) 62.103  1 324.0281 1.443e-11 ***
## clade        0.081  2   0.2117    0.8116    
## Residuals    2.875 15                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model <- lm(mic_sub~clade,data=mb_xylosus)
summary(model)

## 
## Call:
## lm(formula = mic_sub ~ clade, data = mb_xylosus)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2600 -0.3766 -0.0610  0.5589  1.1200 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           3.12143    0.25563  12.211 3.41e-09 ***
## cladePresent_xylosus -0.15143    0.37628  -0.402    0.693    
## cladePs_xylosus      -0.05943    0.39602  -0.150    0.883    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6763 on 15 degrees of freedom
## Multiple R-squared:  0.01074,    Adjusted R-squared:  -0.1212 
## F-statistic: 0.08141 on 2 and 15 DF,  p-value: 0.9222

Confint(model)

##                         Estimate      2.5 %    97.5 %
## (Intercept)           3.12142857  2.5765597 3.6662975
## cladePresent_xylosus -0.15142857 -0.9534539 0.6505967
## cladePs_xylosus      -0.05942857 -0.9035358 0.7846787

emmeans(model,~clade)

##  clade           emmean    SE df lower.CL upper.CL
##  Absent_xylosus    3.12 0.256 15     2.58     3.67
##  Present_xylosus   2.97 0.276 15     2.38     3.56
##  Ps_xylosus        3.06 0.302 15     2.42     3.71
## 
## Confidence level used: 0.95

Anova(model,type="III")

## Anova Table (Type III tests)
## 
## Response: mic_sub
##             Sum Sq Df  F value    Pr(>F)    
## (Intercept) 68.203  1 149.0984 3.407e-09 ***
## clade        0.074  2   0.0814    0.9222    
## Residuals    6.862 15                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# AIP2

table1(~aip_2|clade,data=mb_xylosus)

	Absent_xylosus (N=7)	Present_xylosus (N=6)	Ps_xylosus (N=5)	Overall (N=18)
aip_2
0	2 (28.6%)	4 (66.7%)	0 (0%)	6 (33.3%)
1	5 (71.4%)	2 (33.3%)	5 (100%)	12 (66.7%)

fisher.test(table(mb_xylosus$aip_2,mb_xylosus$clade))

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_xylosus$aip_2, mb_xylosus$clade)
## p-value = 0.07428
## alternative hypothesis: two.sided

# AIP3

table1(~aip_3|clade,data=mb_xylosus)

	Absent_xylosus (N=7)	Present_xylosus (N=6)	Ps_xylosus (N=5)	Overall (N=18)
aip_3
0	6 (85.7%)	2 (33.3%)	5 (100%)	13 (72.2%)
1	1 (14.3%)	4 (66.7%)	0 (0%)	5 (27.8%)

fisher.test(table(mb_xylosus$aip_3,mb_xylosus$clade))

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_xylosus$aip_3, mb_xylosus$clade)
## p-value = 0.03186
## alternative hypothesis: two.sided

# putative bacteriocin 194

table1(~putative_bacteriocin_194|clade,data=mb_xylosus)

	Absent_xylosus (N=7)	Present_xylosus (N=6)	Ps_xylosus (N=5)	Overall (N=18)
putative_bacteriocin_194
0	6 (85.7%)	0 (0%)	5 (100%)	11 (61.1%)
1	1 (14.3%)	6 (100%)	0 (0%)	7 (38.9%)

fisher.test(table(mb_xylosus$putative_bacteriocin_194,mb_xylosus$clade))

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_xylosus$putative_bacteriocin_194, mb_xylosus$clade)
## p-value = 0.0004085
## alternative hypothesis: two.sided

3 Virulence genes

3.1 Summary

# Number of genes identified

virulence_annotated %>% group_by(vir_gene_type) %>% summarise(n=n_distinct(vir_gene_abbreviation))

## # A tibble: 6 × 2
##   vir_gene_type              n
##   <chr>                  <int>
## 1 Adherence                 37
## 2 Enterotoxins_exotoxins    87
## 3 Exoenzymes                22
## 4 Immune_evasion            34
## 5 Iron_uptake_metabolism    29
## 6 Toxin_genes               30

virulence_annotated %>% ungroup() %>% summarise(n=n_distinct(vir_gene_nonredundant))

## # A tibble: 1 × 1
##       n
##   <int>
## 1   200

virulence_annotated %>% group_by(vir_gene_type) %>% summarise(n=n_distinct(vir_gene_nonredundant))

## # A tibble: 6 × 2
##   vir_gene_type              n
##   <chr>                  <int>
## 1 Adherence                 30
## 2 Enterotoxins_exotoxins    59
## 3 Exoenzymes                18
## 4 Immune_evasion            34
## 5 Iron_uptake_metabolism    29
## 6 Toxin_genes               30

# Adherence genes 

adherence_summary <- adherence_long %>% group_by(vir_gene_nonredundant,wgs_species_3) %>% summarise(n=max(presence_gene_nr)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")

## `summarise()` has grouped output by 'vir_gene_nonredundant'. You can override
## using the `.groups` argument.

adherence_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n)) %>% mutate(prop_present = (n_genes/37)*100)

## # A tibble: 10 × 3
##    wgs_species_3 n_genes prop_present
##    <chr>           <dbl>        <dbl>
##  1 MSC                20         54.1
##  2 SAU                30         81.1
##  3 SCH                19         51.4
##  4 SCO                 8         21.6
##  5 SDEV               17         45.9
##  6 SEQ                14         37.8
##  7 SHAEM              21         56.8
##  8 SPXYL              17         45.9
##  9 SSUC               14         37.8
## 10 SXYL               22         59.5

# Exoenzymes genes 

exoenzymes_summary <- exoenzymes_long %>% group_by(vir_gene_nonredundant,wgs_species_3) %>% summarise(n=max(presence_gene_nr)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")

## `summarise()` has grouped output by 'vir_gene_nonredundant'. You can override
## using the `.groups` argument.

exoenzymes_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n)) %>% mutate(prop_present = (n_genes/22)*100)

## # A tibble: 10 × 3
##    wgs_species_3 n_genes prop_present
##    <chr>           <dbl>        <dbl>
##  1 MSC                 6         27.3
##  2 SAU                17         77.3
##  3 SCH                 6         27.3
##  4 SCO                 7         31.8
##  5 SDEV                4         18.2
##  6 SEQ                 4         18.2
##  7 SHAEM               4         18.2
##  8 SPXYL               7         31.8
##  9 SSUC                9         40.9
## 10 SXYL                8         36.4

# Immune evasion genes

im_ev_summary <- im_ev_long %>% group_by(vir_gene_nonredundant,wgs_species_3) %>% summarise(n=max(presence_gene_nr)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")

## `summarise()` has grouped output by 'vir_gene_nonredundant'. You can override
## using the `.groups` argument.

im_ev_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n)) %>% mutate(prop_present = (n_genes/35)*100)

## # A tibble: 10 × 3
##    wgs_species_3 n_genes prop_present
##    <chr>           <dbl>        <dbl>
##  1 MSC                 8         22.9
##  2 SAU                34         97.1
##  3 SCH                30         85.7
##  4 SCO                 8         22.9
##  5 SDEV               12         34.3
##  6 SEQ                15         42.9
##  7 SHAEM              18         51.4
##  8 SPXYL               4         11.4
##  9 SSUC                4         11.4
## 10 SXYL               28         80

# Iron related genes

iron_summary <- iron_long %>% group_by(vir_gene_nonredundant,wgs_species_3) %>% summarise(n=max(presence_gene_nr)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")

## `summarise()` has grouped output by 'vir_gene_nonredundant'. You can override
## using the `.groups` argument.

iron_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n)) %>% mutate(prop_present = (n_genes/29)*100)

## # A tibble: 10 × 3
##    wgs_species_3 n_genes prop_present
##    <chr>           <dbl>        <dbl>
##  1 MSC                17         58.6
##  2 SAU                29        100  
##  3 SCH                14         48.3
##  4 SCO                10         34.5
##  5 SDEV               12         41.4
##  6 SEQ                20         69.0
##  7 SHAEM              16         55.2
##  8 SPXYL              11         37.9
##  9 SSUC               11         37.9
## 10 SXYL               10         34.5

# Enterotoxins and exotoxins genes

enterotoxin_summary <- enterotoxin_long %>% group_by(vir_gene_nonredundant,wgs_species_3) %>% summarise(n=max(presence_gene_nr)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")

## `summarise()` has grouped output by 'vir_gene_nonredundant'. You can override
## using the `.groups` argument.

enterotoxin_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n)) %>% mutate(prop_present = (n_genes/87)*100)

## # A tibble: 2 × 3
##   wgs_species_3 n_genes prop_present
##   <chr>           <dbl>        <dbl>
## 1 SAU                59         67.8
## 2 SCH                36         41.4

# Toxin genes 

toxin_summary <- toxin_long %>% group_by(vir_gene_nonredundant,wgs_species_3) %>% summarise(n=max(presence_gene_nr)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")

## `summarise()` has grouped output by 'vir_gene_nonredundant'. You can override
## using the `.groups` argument.

toxin_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n)) %>% mutate(prop_present = (n_genes/36)*100)

## # A tibble: 10 × 3
##    wgs_species_3 n_genes prop_present
##    <chr>           <dbl>        <dbl>
##  1 MSC                 3         8.33
##  2 SAU                27        75   
##  3 SCH                10        27.8 
##  4 SCO                 8        22.2 
##  5 SDEV                7        19.4 
##  6 SEQ                 6        16.7 
##  7 SHAEM               8        22.2 
##  8 SPXYL               9        25   
##  9 SSUC                7        19.4 
## 10 SXYL                8        22.2

3.2 Visualizations

3.2.1 Adherence

# ---- Adherence genes ----

adherence_sau <- adherence_long %>% filter(grepl("SAU",wgs_isolate_id)) 

adherence_plot_1 <- adherence_sau %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(adherence_sau$wgs_isolate_id,adherence_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

adherence_sch <- adherence_long %>% filter(grepl("SCH",wgs_isolate_id)) 

adherence_plot_2 <- adherence_sch %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(adherence_sch$wgs_isolate_id,adherence_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


adherence_shaem <- adherence_long %>% filter(grepl("SHAEM",wgs_isolate_id)) 


adherence_plot_3 <- adherence_shaem %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(adherence_shaem$wgs_isolate_id,adherence_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




adherence_ssc <- adherence_long %>% filter(grepl("MSC",wgs_isolate_id)) 


adherence_plot_4 <- adherence_ssc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(adherence_ssc$wgs_isolate_id,adherence_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Mammaliicoccus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())





adherence_ssuc <- adherence_long %>% filter(grepl("SSUC",wgs_isolate_id)) 




adherence_plot_5 <- adherence_ssuc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(adherence_ssuc$wgs_isolate_id,adherence_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())



adherence_ssxyl <- adherence_long %>% filter(grepl("SXYL",wgs_isolate_id)) 

adherence_ssxyl$wgs_isolate_id <- fct_reorder(adherence_ssxyl$wgs_isolate_id,adherence_ssxyl$wgs_id)

adherence_spxyl <- adherence_long %>% filter(grepl("SPXYL",wgs_isolate_id)) 

levels(adherence_long$wgs_isolate_id)

## NULL

adherence_spxyl$wgs_isolate_id <- fct_reorder(adherence_spxyl$wgs_isolate_id,adherence_spxyl$wgs_id)
             

adherence_sxyl_spxyl <- rbind(adherence_ssxyl,adherence_spxyl)                                 
#adherence_ssxyl$wgs_isolate_id <- fct_reorder(adherence_ssxyl$wgs_isolate_id,adherence_ssxyl$wgs_id)

table(adherence_long$wgs_isolate_id)

## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      30      30      30      30      30      30      30      30      30      30 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      30      30      30      30      30      30      30      30      30      30 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      30      30      30      30      30      30      30      30      30      30 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      30      30      30      30      30      30      30      30      30      30 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      30      30      30      30      30      30      30      30      30      30 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      30      30      30      30      30      30      30      30      30      30 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      30      30      30      30      30      30      30      30      30      30 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      30      30      30      30      30      30      30      30      30      30 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      30      30      30      30

adherence_plot_6 <- adherence_sxyl_spxyl %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


adherence_sco <- adherence_long %>% filter(grepl("SCO",wgs_isolate_id)) 

adherence_sco$wgs_isolate_id <- fct_reorder(adherence_sco$wgs_isolate_id,adherence_sco$wgs_id)

adherence_sdev <- adherence_long %>% filter(grepl("SDEV",wgs_isolate_id)) 

levels(adherence_long$wgs_isolate_id)

## NULL

adherence_sdev$wgs_isolate_id <- fct_reorder(adherence_sdev$wgs_isolate_id,adherence_sdev$wgs_id)



adherence_seq <- adherence_long %>% filter(grepl("SEQ",wgs_isolate_id)) 

adherence_seq$wgs_isolate_id <- fct_reorder(adherence_seq$wgs_isolate_id,adherence_seq$wgs_id)


adherence_other <- rbind(adherence_sco,adherence_sdev,adherence_seq)                                 

table(adherence_long$wgs_isolate_id)

## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      30      30      30      30      30      30      30      30      30      30 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      30      30      30      30      30      30      30      30      30      30 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      30      30      30      30      30      30      30      30      30      30 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      30      30      30      30      30      30      30      30      30      30 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      30      30      30      30      30      30      30      30      30      30 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      30      30      30      30      30      30      30      30      30      30 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      30      30      30      30      30      30      30      30      30      30 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      30      30      30      30      30      30      30      30      30      30 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      30      30      30      30

adherence_plot_7 <- adherence_other %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Adherence",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),,axis.text.x = element_text(angle = 45, hjust = 1))




# Multipanel figure

adherence_plot <- ggarrange(adherence_plot_1 + rremove("xlab")+ rremove("ylab"),adherence_plot_2 + rremove("xlab")+ rremove("ylab"),adherence_plot_3 + rremove("xlab")+ rremove("ylab"),adherence_plot_4 + rremove("xlab")+ rremove("ylab"),adherence_plot_5 + rremove("xlab")+ rremove("ylab"),adherence_plot_6 + rremove("xlab")+ rremove("ylab"),adherence_plot_7 + rremove("xlab")+ rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2.25))

annotate_figure(adherence_plot,
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/adherence.png",width = 20, height = 40, units = "cm")
ggsave(plot = last_plot(),"./figures/adherence.pdf",width = 20, height = 40, units = "cm")

3.2.2 Exoenzymes

# ---- exoenzymes genes ----

exoenzymes_sau <- exoenzymes_long %>% filter(grepl("SAU",wgs_isolate_id)) 


exoenzymes_plot_1 <- exoenzymes_sau %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(exoenzymes_sau$wgs_isolate_id,exoenzymes_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




exoenzymes_sch <- exoenzymes_long %>% filter(grepl("SCH",wgs_isolate_id)) 


exoenzymes_plot_2 <- exoenzymes_sch %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(exoenzymes_sch$wgs_isolate_id,exoenzymes_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


exoenzymes_shaem <- exoenzymes_long %>% filter(grepl("SHAEM",wgs_isolate_id)) 


exoenzymes_plot_3 <- exoenzymes_shaem %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(exoenzymes_shaem$wgs_isolate_id,exoenzymes_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




exoenzymes_ssc <- exoenzymes_long %>% filter(grepl("MSC",wgs_isolate_id)) 


exoenzymes_plot_4 <- exoenzymes_ssc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(exoenzymes_ssc$wgs_isolate_id,exoenzymes_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Mammaliicoccus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())





exoenzymes_ssuc <- exoenzymes_long %>% filter(grepl("SSUC",wgs_isolate_id)) 




exoenzymes_plot_5 <- exoenzymes_ssuc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(exoenzymes_ssuc$wgs_isolate_id,exoenzymes_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())



exoenzymes_ssxyl <- exoenzymes_long %>% filter(grepl("SXYL",wgs_isolate_id)) 

exoenzymes_ssxyl$wgs_isolate_id <- fct_reorder(exoenzymes_ssxyl$wgs_isolate_id,exoenzymes_ssxyl$wgs_id)

exoenzymes_spxyl <- exoenzymes_long %>% filter(grepl("SPXYL",wgs_isolate_id)) 

levels(exoenzymes_long$wgs_isolate_id)

## NULL

exoenzymes_spxyl$wgs_isolate_id <- fct_reorder(exoenzymes_spxyl$wgs_isolate_id,exoenzymes_spxyl$wgs_id)


exoenzymes_sxyl_spxyl <- rbind(exoenzymes_ssxyl,exoenzymes_spxyl)                                 
#exoenzymes_ssxyl$wgs_isolate_id <- fct_reorder(exoenzymes_ssxyl$wgs_isolate_id,exoenzymes_ssxyl$wgs_id)

table(exoenzymes_long$wgs_isolate_id)

## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      18      18      18      18      18      18      18      18      18      18 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      18      18      18      18      18      18      18      18      18      18 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      18      18      18      18      18      18      18      18      18      18 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      18      18      18      18      18      18      18      18      18      18 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      18      18      18      18      18      18      18      18      18      18 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      18      18      18      18      18      18      18      18      18      18 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      18      18      18      18      18      18      18      18      18      18 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      18      18      18      18      18      18      18      18      18      18 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      18      18      18      18

exoenzymes_plot_6 <- exoenzymes_sxyl_spxyl %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


exoenzymes_sco <- exoenzymes_long %>% filter(grepl("SCO",wgs_isolate_id)) 

exoenzymes_sco$wgs_isolate_id <- fct_reorder(exoenzymes_sco$wgs_isolate_id,exoenzymes_sco$wgs_id)

exoenzymes_sdev <- exoenzymes_long %>% filter(grepl("SDEV",wgs_isolate_id)) 

levels(exoenzymes_long$wgs_isolate_id)

## NULL

exoenzymes_sdev$wgs_isolate_id <- fct_reorder(exoenzymes_sdev$wgs_isolate_id,exoenzymes_sdev$wgs_id)



exoenzymes_seq <- exoenzymes_long %>% filter(grepl("SEQ",wgs_isolate_id)) 

exoenzymes_seq$wgs_isolate_id <- fct_reorder(exoenzymes_seq$wgs_isolate_id,exoenzymes_seq$wgs_id)


exoenzymes_other <- rbind(exoenzymes_sco,exoenzymes_sdev,exoenzymes_seq)                                 

table(exoenzymes_long$wgs_isolate_id)

## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      18      18      18      18      18      18      18      18      18      18 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      18      18      18      18      18      18      18      18      18      18 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      18      18      18      18      18      18      18      18      18      18 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      18      18      18      18      18      18      18      18      18      18 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      18      18      18      18      18      18      18      18      18      18 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      18      18      18      18      18      18      18      18      18      18 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      18      18      18      18      18      18      18      18      18      18 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      18      18      18      18      18      18      18      18      18      18 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      18      18      18      18

exoenzymes_plot_7 <- exoenzymes_other %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Exoenzymes",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x = element_text(angle = 45, hjust = 1))




# Multipanel figure

exoenzymes_plot <- ggarrange(exoenzymes_plot_1 + rremove("xlab")+ rremove("ylab"),exoenzymes_plot_2 + rremove("xlab")+ rremove("ylab"),exoenzymes_plot_3 + rremove("xlab")+ rremove("ylab"),exoenzymes_plot_4 + rremove("xlab")+ rremove("ylab"),exoenzymes_plot_5 + rremove("xlab")+ rremove("ylab"),exoenzymes_plot_6 + rremove("xlab")+ rremove("ylab"),exoenzymes_plot_7 + rremove("xlab")+ rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2.25))

annotate_figure(exoenzymes_plot,
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/exoenzymes.pdf",width = 20, height = 40, units = "cm")
ggsave(plot = last_plot(),"./figures/exoenzymes.png",width = 20, height = 40, units = "cm")




# plot adherence + exoenzymes

adherence_exoenzymes_plot <- ggarrange(
  adherence_plot_1 + rremove("xlab") + rremove("ylab"),
  exoenzymes_plot_1 + rremove("xlab") + rremove("ylab") ,
  adherence_plot_2 + rremove("xlab") + rremove("ylab"),
  exoenzymes_plot_2 + rremove("xlab") + rremove("ylab") ,
  adherence_plot_3 + rremove("xlab") + rremove("ylab"),
  exoenzymes_plot_3 + rremove("xlab") + rremove("ylab") ,
  adherence_plot_4 + rremove("xlab") + rremove("ylab"),
  exoenzymes_plot_4 + rremove("xlab") + rremove("ylab") ,
  adherence_plot_5 + rremove("xlab") + rremove("ylab"),
  exoenzymes_plot_5 + rremove("xlab") + rremove("ylab") ,
  adherence_plot_6 + rremove("xlab") + rremove("ylab"),
  exoenzymes_plot_6 + rremove("xlab") + rremove("ylab") ,
  adherence_plot_7 + rremove("ylab"),
  exoenzymes_plot_7 + rremove("ylab") ,
  ncol = 2, nrow = 7, common.legend = TRUE, align = "v",
  labels = c("A1", "A2", "B1", "B2", "C1", "C2", "D1", "D2", "E1", "E2", "F1", "F2", "G1", "G2"),
  heights = c(1.25,4.5, 4.25, 1.75,3,3.5, 2.25)
)

annotate_figure(adherence_exoenzymes_plot,
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/figure_8.pdf",width = 40, height = 40, units = "cm",dpi=600)
ggsave(plot = last_plot(),"./figures/figure_8.png",width = 40, height = 40, units = "cm",dpi=600)
ggsave(plot = last_plot(),"./figures/figure_8.jpeg",width = 40, height = 40, units = "cm",dpi=600)

3.2.3 Immune evasion

# ---- im_ev genes ----

im_ev_sau <- im_ev_long %>% filter(grepl("SAU",wgs_isolate_id)) 


im_ev_plot_1 <- im_ev_sau %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(im_ev_sau$wgs_isolate_id,im_ev_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




im_ev_sch <- im_ev_long %>% filter(grepl("SCH",wgs_isolate_id)) 


im_ev_plot_2 <- im_ev_sch %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(im_ev_sch$wgs_isolate_id,im_ev_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


im_ev_shaem <- im_ev_long %>% filter(grepl("SHAEM",wgs_isolate_id)) 


im_ev_plot_3 <- im_ev_shaem %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(im_ev_shaem$wgs_isolate_id,im_ev_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




im_ev_ssc <- im_ev_long %>% filter(grepl("MSC",wgs_isolate_id)) 


im_ev_plot_4 <- im_ev_ssc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(im_ev_ssc$wgs_isolate_id,im_ev_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Mammaliicoccus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())





im_ev_ssuc <- im_ev_long %>% filter(grepl("SSUC",wgs_isolate_id)) 




im_ev_plot_5 <- im_ev_ssuc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(im_ev_ssuc$wgs_isolate_id,im_ev_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())



im_ev_ssxyl <- im_ev_long %>% filter(grepl("SXYL",wgs_isolate_id)) 

im_ev_ssxyl$wgs_isolate_id <- fct_reorder(im_ev_ssxyl$wgs_isolate_id,im_ev_ssxyl$wgs_id)

im_ev_spxyl <- im_ev_long %>% filter(grepl("SPXYL",wgs_isolate_id)) 

levels(im_ev_long$wgs_isolate_id)

## NULL

im_ev_spxyl$wgs_isolate_id <- fct_reorder(im_ev_spxyl$wgs_isolate_id,im_ev_spxyl$wgs_id)


im_ev_sxyl_spxyl <- rbind(im_ev_ssxyl,im_ev_spxyl)                                 
#im_ev_ssxyl$wgs_isolate_id <- fct_reorder(im_ev_ssxyl$wgs_isolate_id,im_ev_ssxyl$wgs_id)

table(im_ev_long$wgs_isolate_id)

## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      34      34      34      34      34      34      34      34      34      34 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      34      34      34      34      34      34      34      34      34      34 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      34      34      34      34      34      34      34      34      34      34 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      34      34      34      34      34      34      34      34      34      34 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      34      34      34      34      34      34      34      34      34      34 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      34      34      34      34      34      34      34      34      34      34 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      34      34      34      34      34      34      34      34      34      34 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      34      34      34      34      34      34      34      34      34      34 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      34      34      34      34

im_ev_plot_6 <- im_ev_sxyl_spxyl %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


im_ev_sco <- im_ev_long %>% filter(grepl("SCO",wgs_isolate_id)) 

im_ev_sco$wgs_isolate_id <- fct_reorder(im_ev_sco$wgs_isolate_id,im_ev_sco$wgs_id)

im_ev_sdev <- im_ev_long %>% filter(grepl("SDEV",wgs_isolate_id)) 

levels(im_ev_long$wgs_isolate_id)

## NULL

im_ev_sdev$wgs_isolate_id <- fct_reorder(im_ev_sdev$wgs_isolate_id,im_ev_sdev$wgs_id)



im_ev_seq <- im_ev_long %>% filter(grepl("SEQ",wgs_isolate_id)) 

im_ev_seq$wgs_isolate_id <- fct_reorder(im_ev_seq$wgs_isolate_id,im_ev_seq$wgs_id)


im_ev_other <- rbind(im_ev_sco,im_ev_sdev,im_ev_seq)                                 

table(im_ev_long$wgs_isolate_id)

## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      34      34      34      34      34      34      34      34      34      34 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      34      34      34      34      34      34      34      34      34      34 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      34      34      34      34      34      34      34      34      34      34 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      34      34      34      34      34      34      34      34      34      34 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      34      34      34      34      34      34      34      34      34      34 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      34      34      34      34      34      34      34      34      34      34 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      34      34      34      34      34      34      34      34      34      34 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      34      34      34      34      34      34      34      34      34      34 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      34      34      34      34

im_ev_plot_7 <- im_ev_other %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Immune evasion",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x = element_text(angle = 45, hjust = 1))




# Multipanel figure

im_ev_plot <- ggarrange(im_ev_plot_1 + rremove("xlab")+ rremove("ylab"),im_ev_plot_2 + rremove("xlab")+ rremove("ylab"),im_ev_plot_3 + rremove("xlab")+ rremove("ylab"),im_ev_plot_4 + rremove("xlab")+ rremove("ylab"),im_ev_plot_5 + rremove("xlab")+ rremove("ylab"),im_ev_plot_6 + rremove("xlab")+ rremove("ylab"),im_ev_plot_7 + rremove("xlab")+ rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2.25))

annotate_figure(im_ev_plot,
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/im_ev.pdf",width = 20, height = 40, units = "cm")
ggsave(plot = last_plot(),"./figures/im_ev.png",width = 20, height = 40, units = "cm")

3.2.4 Iron related

# ---- iron genes ----

iron_sau <- iron_long %>% filter(grepl("SAU",wgs_isolate_id)) 

#dummy_row <- data.frame(presence_gene = "No", vir_gene_nonredundant = NA)
#iron_sau <- rbind(iron_sau, dummy_row)

iron_sau$presence_gene <- factor(iron_sau$presence_gene,levels=c("No","Yes"))

fill_colors <- c("Yes" = "#009999", "No" = "white")
                                 
iron_plot_1 <- iron_sau %>% 
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(iron_sau$wgs_isolate_id,iron_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(values = fill_colors,drop=FALSE) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




iron_sch <- iron_long %>% filter(grepl("SCH",wgs_isolate_id)) 


iron_plot_2 <- iron_sch %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(iron_sch$wgs_isolate_id,iron_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


iron_shaem <- iron_long %>% filter(grepl("SHAEM",wgs_isolate_id)) 


iron_plot_3 <- iron_shaem %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(iron_shaem$wgs_isolate_id,iron_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




iron_ssc <- iron_long %>% filter(grepl("MSC",wgs_isolate_id)) 


iron_plot_4 <- iron_ssc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(iron_ssc$wgs_isolate_id,iron_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Mammaliicoccus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())





iron_ssuc <- iron_long %>% filter(grepl("SSUC",wgs_isolate_id)) 




iron_plot_5 <- iron_ssuc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(iron_ssuc$wgs_isolate_id,iron_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())



iron_ssxyl <- iron_long %>% filter(grepl("SXYL",wgs_isolate_id)) 

iron_ssxyl$wgs_isolate_id <- fct_reorder(iron_ssxyl$wgs_isolate_id,iron_ssxyl$wgs_id)

iron_spxyl <- iron_long %>% filter(grepl("SPXYL",wgs_isolate_id)) 

levels(iron_long$wgs_isolate_id)

## NULL

iron_spxyl$wgs_isolate_id <- fct_reorder(iron_spxyl$wgs_isolate_id,iron_spxyl$wgs_id)


iron_sxyl_spxyl <- rbind(iron_ssxyl,iron_spxyl)                                 
#iron_ssxyl$wgs_isolate_id <- fct_reorder(iron_ssxyl$wgs_isolate_id,iron_ssxyl$wgs_id)

table(iron_long$wgs_isolate_id)

## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      29      29      29      29      29      29      29      29      29      29 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      29      29      29      29      29      29      29      29      29      29 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      29      29      29      29      29      29      29      29      29      29 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      29      29      29      29      29      29      29      29      29      29 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      29      29      29      29      29      29      29      29      29      29 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      29      29      29      29      29      29      29      29      29      29 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      29      29      29      29      29      29      29      29      29      29 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      29      29      29      29      29      29      29      29      29      29 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      29      29      29      29

iron_plot_6 <- iron_sxyl_spxyl %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


iron_sco <- iron_long %>% filter(grepl("SCO",wgs_isolate_id)) 

iron_sco$wgs_isolate_id <- fct_reorder(iron_sco$wgs_isolate_id,iron_sco$wgs_id)

iron_sdev <- iron_long %>% filter(grepl("SDEV",wgs_isolate_id)) 

levels(iron_long$wgs_isolate_id)

## NULL

iron_sdev$wgs_isolate_id <- fct_reorder(iron_sdev$wgs_isolate_id,iron_sdev$wgs_id)



iron_seq <- iron_long %>% filter(grepl("SEQ",wgs_isolate_id)) 

iron_seq$wgs_isolate_id <- fct_reorder(iron_seq$wgs_isolate_id,iron_seq$wgs_id)


iron_other <- rbind(iron_sco,iron_sdev,iron_seq)                                 

table(iron_long$wgs_isolate_id)

## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      29      29      29      29      29      29      29      29      29      29 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      29      29      29      29      29      29      29      29      29      29 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      29      29      29      29      29      29      29      29      29      29 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      29      29      29      29      29      29      29      29      29      29 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      29      29      29      29      29      29      29      29      29      29 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      29      29      29      29      29      29      29      29      29      29 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      29      29      29      29      29      29      29      29      29      29 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      29      29      29      29      29      29      29      29      29      29 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      29      29      29      29

iron_plot_7 <- iron_other %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Iron uptake and metabolism",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x = element_text(angle = 45, hjust = 1))




# Multipanel figure

iron_plot <- ggarrange(iron_plot_1 + rremove("xlab")+ rremove("ylab"),iron_plot_2 + rremove("xlab")+ rremove("ylab"),iron_plot_3 + rremove("xlab")+ rremove("ylab"),iron_plot_4 + rremove("xlab")+ rremove("ylab"),iron_plot_5 + rremove("xlab")+ rremove("ylab"),iron_plot_6 + rremove("xlab")+ rremove("ylab"),iron_plot_7 + rremove("xlab")+ rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2.25))

annotate_figure(iron_plot,
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/iron.pdf",width = 20, height = 40, units = "cm")

# Plot with immune evasion + iron genes 

im_ev_iron_plot <- ggarrange(
  im_ev_plot_1 + rremove("xlab") + rremove("ylab"),
  iron_plot_1 + rremove("xlab") + rremove("ylab") ,
  im_ev_plot_2 + rremove("xlab") + rremove("ylab"),
  iron_plot_2 + rremove("xlab") + rremove("ylab") ,
  im_ev_plot_3 + rremove("xlab") + rremove("ylab"),
  iron_plot_3 + rremove("xlab") + rremove("ylab") ,
  im_ev_plot_4 + rremove("xlab") + rremove("ylab"),
  iron_plot_4 + rremove("xlab") + rremove("ylab") ,
  im_ev_plot_5 + rremove("xlab") + rremove("ylab"),
  iron_plot_5 + rremove("xlab") + rremove("ylab") ,
  im_ev_plot_6 + rremove("xlab") + rremove("ylab"),
  iron_plot_6 + rremove("xlab") + rremove("ylab") ,
  im_ev_plot_7 + rremove("ylab"),
  iron_plot_7 + rremove("ylab") ,
  ncol = 2, nrow = 7, common.legend = TRUE, align = "v",
  labels = c("A1", "A2", "B1", "B2", "C1", "C2", "D1", "D2", "E1", "E2", "F1", "F2", "G1", "G2"),
  heights = c(1.25,4.5, 4.25, 1.75,3,3.5, 2.25)
)

annotate_figure(im_ev_iron_plot,
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/figure_9.pdf",width = 40, height = 40, units = "cm",dpi=600)
ggsave(plot = last_plot(),"./figures/figure_9.png",width = 40, height = 40, units = "cm",dpi=600)
ggsave(plot = last_plot(),"./figures/figure_9.jpeg",width = 40, height = 40, units = "cm",dpi=600)

3.2.5 Enterotoxins and exotoxins

# ---- enterotoxin genes ----

enterotoxin_sau <- enterotoxin_long %>% filter(grepl("SAU",wgs_isolate_id)) 


enterotoxin_plot_1 <- enterotoxin_sau %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(enterotoxin_sau$wgs_isolate_id,enterotoxin_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

enterotoxin_sch <- enterotoxin_long %>% filter(grepl("SCH",wgs_isolate_id)) 


enterotoxin_plot_2 <- enterotoxin_sch %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(enterotoxin_sch$wgs_isolate_id,enterotoxin_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


enterotoxin_shaem <- enterotoxin_long %>% filter(grepl("SHAEM",wgs_isolate_id)) 


enterotoxin_plot_3 <- enterotoxin_shaem %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(enterotoxin_shaem$wgs_isolate_id,enterotoxin_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

enterotoxin_ssc <- enterotoxin_long %>% filter(grepl("MSC",wgs_isolate_id)) 

enterotoxin_plot_4 <- enterotoxin_ssc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(enterotoxin_ssc$wgs_isolate_id,enterotoxin_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Mammaliicoccus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

enterotoxin_ssuc <- enterotoxin_long %>% filter(grepl("SSUC",wgs_isolate_id)) 

enterotoxin_plot_5 <- enterotoxin_ssuc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(enterotoxin_ssuc$wgs_isolate_id,enterotoxin_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

enterotoxin_ssxyl <- enterotoxin_long %>% filter(grepl("SXYL",wgs_isolate_id)) 

enterotoxin_ssxyl$wgs_isolate_id <- fct_reorder(enterotoxin_ssxyl$wgs_isolate_id,enterotoxin_ssxyl$wgs_id)

enterotoxin_spxyl <- enterotoxin_long %>% filter(grepl("SPXYL",wgs_isolate_id)) 

levels(enterotoxin_long$wgs_isolate_id)

## NULL

enterotoxin_spxyl$wgs_isolate_id <- fct_reorder(enterotoxin_spxyl$wgs_isolate_id,enterotoxin_spxyl$wgs_id)


enterotoxin_sxyl_spxyl <- rbind(enterotoxin_ssxyl,enterotoxin_spxyl)                                 
#enterotoxin_ssxyl$wgs_isolate_id <- fct_reorder(enterotoxin_ssxyl$wgs_isolate_id,enterotoxin_ssxyl$wgs_id)

table(enterotoxin_long$wgs_isolate_id)

## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      59      59      59      59      59      59      59      59      59      59 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      59      59      59      59      59      59      59      59      59      59 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      59      59      59      59      59      59      59      59      59      59 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      59      59      59      59      59      59      59      59      59      59 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      59      59      59      59      59      59      59      59      59      59 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      59      59      59      59      59      59      59      59      59      59 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      59      59      59      59      59      59      59      59      59      59 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      59      59      59      59      59      59      59      59      59      59 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      59      59      59      59

enterotoxin_plot_6 <- enterotoxin_sxyl_spxyl %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


enterotoxin_sco <- enterotoxin_long %>% filter(grepl("SCO",wgs_isolate_id)) 

enterotoxin_sco$wgs_isolate_id <- fct_reorder(enterotoxin_sco$wgs_isolate_id,enterotoxin_sco$wgs_id)

enterotoxin_sdev <- enterotoxin_long %>% filter(grepl("SDEV",wgs_isolate_id)) 

levels(enterotoxin_long$wgs_isolate_id)

## NULL

enterotoxin_sdev$wgs_isolate_id <- fct_reorder(enterotoxin_sdev$wgs_isolate_id,enterotoxin_sdev$wgs_id)



enterotoxin_seq <- enterotoxin_long %>% filter(grepl("SEQ",wgs_isolate_id)) 

enterotoxin_seq$wgs_isolate_id <- fct_reorder(enterotoxin_seq$wgs_isolate_id,enterotoxin_seq$wgs_id)


enterotoxin_other <- rbind(enterotoxin_sco,enterotoxin_sdev,enterotoxin_seq)                                 

table(enterotoxin_long$wgs_isolate_id)

## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      59      59      59      59      59      59      59      59      59      59 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      59      59      59      59      59      59      59      59      59      59 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      59      59      59      59      59      59      59      59      59      59 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      59      59      59      59      59      59      59      59      59      59 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      59      59      59      59      59      59      59      59      59      59 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      59      59      59      59      59      59      59      59      59      59 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      59      59      59      59      59      59      59      59      59      59 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      59      59      59      59      59      59      59      59      59      59 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      59      59      59      59

enterotoxin_plot_7 <- enterotoxin_other %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Enterotoxins & exotoxins",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x = element_text(angle = 45, hjust = 1))




# Multipanel figure

enterotoxin_plot <- ggarrange(enterotoxin_plot_1 + rremove("xlab")+ rremove("ylab"),enterotoxin_plot_2 + rremove("xlab")+ rremove("ylab"),enterotoxin_plot_3 + rremove("xlab")+ rremove("ylab"),enterotoxin_plot_4 + rremove("xlab")+ rremove("ylab"),enterotoxin_plot_5 + rremove("xlab")+ rremove("ylab"),enterotoxin_plot_6 + rremove("xlab")+ rremove("ylab"),enterotoxin_plot_7 + rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2.25))

annotate_figure(enterotoxin_plot,
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/enterotoxin.pdf",width = 50, height = 40, units = "cm")
ggsave(plot = last_plot(),"./figures/enterotoxin.png",width = 50, height = 40, units = "cm")

3.2.6 Other toxins

# ---- toxin genes ----


toxin_sau <- toxin_long %>% filter(grepl("SAU",wgs_isolate_id)) 


toxin_plot_1 <- toxin_sau %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(toxin_sau$wgs_isolate_id,toxin_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




toxin_sch <- toxin_long %>% filter(grepl("SCH",wgs_isolate_id)) 


toxin_plot_2 <- toxin_sch %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(toxin_sch$wgs_isolate_id,toxin_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


toxin_shaem <- toxin_long %>% filter(grepl("SHAEM",wgs_isolate_id)) 


toxin_plot_3 <- toxin_shaem %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(toxin_shaem$wgs_isolate_id,toxin_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




toxin_ssc <- toxin_long %>% filter(grepl("MSC",wgs_isolate_id)) 


toxin_plot_4 <- toxin_ssc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(toxin_ssc$wgs_isolate_id,toxin_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Mammaliicoccus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())





toxin_ssuc <- toxin_long %>% filter(grepl("SSUC",wgs_isolate_id)) 




toxin_plot_5 <- toxin_ssuc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(toxin_ssuc$wgs_isolate_id,toxin_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())



toxin_ssxyl <- toxin_long %>% filter(grepl("SXYL",wgs_isolate_id)) 

toxin_ssxyl$wgs_isolate_id <- fct_reorder(toxin_ssxyl$wgs_isolate_id,toxin_ssxyl$wgs_id)

toxin_spxyl <- toxin_long %>% filter(grepl("SPXYL",wgs_isolate_id)) 

levels(toxin_long$wgs_isolate_id)

## NULL

toxin_spxyl$wgs_isolate_id <- fct_reorder(toxin_spxyl$wgs_isolate_id,toxin_spxyl$wgs_id)


toxin_sxyl_spxyl <- rbind(toxin_ssxyl,toxin_spxyl)                                 
#toxin_ssxyl$wgs_isolate_id <- fct_reorder(toxin_ssxyl$wgs_isolate_id,toxin_ssxyl$wgs_id)

table(toxin_long$wgs_isolate_id)

## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      30      30      30      30      30      30      30      30      30      30 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      30      30      30      30      30      30      30      30      30      30 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      30      30      30      30      30      30      30      30      30      30 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      30      30      30      30      30      30      30      30      30      30 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      30      30      30      30      30      30      30      30      30      30 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      30      30      30      30      30      30      30      30      30      30 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      30      30      30      30      30      30      30      30      30      30 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      30      30      30      30      30      30      30      30      30      30 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      30      30      30      30

toxin_plot_6 <- toxin_sxyl_spxyl %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


toxin_sco <- toxin_long %>% filter(grepl("SCO",wgs_isolate_id)) 

toxin_sco$wgs_isolate_id <- fct_reorder(toxin_sco$wgs_isolate_id,toxin_sco$wgs_id)

toxin_sdev <- toxin_long %>% filter(grepl("SDEV",wgs_isolate_id)) 

levels(toxin_long$wgs_isolate_id)

## NULL

toxin_sdev$wgs_isolate_id <- fct_reorder(toxin_sdev$wgs_isolate_id,toxin_sdev$wgs_id)



toxin_seq <- toxin_long %>% filter(grepl("SEQ",wgs_isolate_id)) 

toxin_seq$wgs_isolate_id <- fct_reorder(toxin_seq$wgs_isolate_id,toxin_seq$wgs_id)


toxin_other <- rbind(toxin_sco,toxin_sdev,toxin_seq)                                 

table(toxin_long$wgs_isolate_id)

## 
##    MSC1    MSC2    MSC3    MSC4    MSC5    MSC6    SAU1    SAU2    SAU3    SCH1 
##      30      30      30      30      30      30      30      30      30      30 
##   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15   SCH16   SCH17   SCH18   SCH19 
##      30      30      30      30      30      30      30      30      30      30 
##    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5    SCH6    SCH7    SCH8    SCH9 
##      30      30      30      30      30      30      30      30      30      30 
##    SCO1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 SHAEM11 SHAEM12 SHAEM13 SHAEM14 
##      30      30      30      30      30      30      30      30      30      30 
## SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2  SHAEM3  SHAEM4  SHAEM5  SHAEM6 
##      30      30      30      30      30      30      30      30      30      30 
##  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3  SPXYL4  SPXYL5   SSUC1  SSUC10 
##      30      30      30      30      30      30      30      30      30      30 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      30      30      30      30      30      30      30      30      30      30 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      30      30      30      30      30      30      30      30      30      30 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      30      30      30      30

toxin_plot_7 <- toxin_other %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Other toxins",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x = element_text(angle = 45, hjust = 1))




# Multipanel figure

toxin_plot <- ggarrange(toxin_plot_1 + rremove("xlab")+ rremove("ylab"),toxin_plot_2 + rremove("xlab")+ rremove("ylab"),toxin_plot_3 + rremove("xlab")+ rremove("ylab"),toxin_plot_4 + rremove("xlab")+ rremove("ylab"),toxin_plot_5 + rremove("xlab")+ rremove("ylab"),toxin_plot_6 + rremove("xlab")+ rremove("ylab"),toxin_plot_7 + rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2.25))

annotate_figure(toxin_plot,
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/toxin.pdf",width = 20, height = 40, units = "cm")
ggsave(plot = last_plot(),"./figures/toxin.png",width = 20, height = 40, units = "cm")


enterotoxin_toxin_plot <- ggarrange(
  enterotoxin_plot_1 + rremove("xlab") + rremove("ylab"),
  toxin_plot_1 + rremove("xlab") + rremove("ylab") ,
  enterotoxin_plot_2 + rremove("xlab") + rremove("ylab"),
  toxin_plot_2 + rremove("xlab") + rremove("ylab") ,
  enterotoxin_plot_3 + rremove("xlab") + rremove("ylab"),
  toxin_plot_3 + rremove("xlab") + rremove("ylab") ,
  enterotoxin_plot_4 + rremove("xlab") + rremove("ylab"),
  toxin_plot_4 + rremove("xlab") + rremove("ylab") ,
  enterotoxin_plot_5 + rremove("xlab") + rremove("ylab"),
  toxin_plot_5 + rremove("xlab") + rremove("ylab") ,
  enterotoxin_plot_6 + rremove("xlab") + rremove("ylab"),
  toxin_plot_6 + rremove("xlab") + rremove("ylab") ,
  enterotoxin_plot_7 + rremove("ylab"),
  toxin_plot_7 + rremove("ylab") ,
  ncol = 2, nrow = 7, common.legend = TRUE, align = "v",
  labels = c("A1", "A2", "B1", "B2", "C1", "C2", "D1", "D2", "E1", "E2", "F1", "F2", "G1", "G2"),
  heights = c(1.25,4.5, 4.25, 1.75,3,3.5, 2.25))

annotate_figure(enterotoxin_toxin_plot,
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/figure_10.pdf",width = 50, height = 40, units = "cm",dpi=600)
ggsave(plot = last_plot(),"./figures/figure_10.png",width = 50, height = 40, units = "cm",dpi=600)
ggsave(plot = last_plot(),"./figures/figure_10.jpeg",width = 50, height = 40, units = "cm",dpi=600)

4 AMR genes and plasmids

4.1 Summary

# Presence of AMR related genes

amr_long$sau <- ifelse(amr_long$wgs_species_3 == "SAU", 1, 0)

amr_long <- amr_long %>% filter(wgs_species_3!="SUB")

amr_long_summary <- amr_long %>% group_by(gene_abbreviation,wgs_species_3) %>% summarise(n=max(presence)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")

## `summarise()` has grouped output by 'gene_abbreviation'. You can override using
## the `.groups` argument.

amr_long_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n))

## # A tibble: 10 × 2
##    wgs_species_3 n_genes
##    <chr>           <dbl>
##  1 MSC                 4
##  2 SAU                15
##  3 SCH                 1
##  4 SCO                 6
##  5 SDEV                3
##  6 SEQ                 2
##  7 SHAEM               4
##  8 SPXYL               6
##  9 SSUC                1
## 10 SXYL                4

table1(~presence_gene|gene_abbreviation,data=amr_long)

## Warning in .table1.internal(x = x, labels = labels, groupspan = groupspan, :
## Table has 29 columns. Are you sure this is what you want?

	AAC3 (N=83)	ANT9 (N=83)	APH3-PRIME (N=83)	ARLR (N=83)	ARLS (N=83)	BLAZ (N=83)	DHAP (N=83)	ERM (N=83)	FOSB (N=83)	FOSD (N=83)	FUSF (N=83)	LMRS (N=83)	LNUA (N=83)	MECA (N=83)	MECC (N=83)	MECI (N=83)	MEPA (N=83)	MEPB (N=83)	MEPR (N=83)	MGRA (N=83)	MPHC (N=83)	MSRA (N=83)	NORA (N=83)	NORB (N=83)	RLMH (N=83)	SALA (N=83)	TET38 (N=83)	TETK (N=83)	Overall (N=2324)
presence_gene
No	80 (96.4%)	82 (98.8%)	41 (49.4%)	80 (96.4%)	80 (96.4%)	78 (94.0%)	80 (96.4%)	78 (94.0%)	81 (97.6%)	81 (97.6%)	82 (98.8%)	80 (96.4%)	77 (92.8%)	77 (92.8%)	78 (94.0%)	78 (94.0%)	80 (96.4%)	80 (96.4%)	80 (96.4%)	59 (71.1%)	69 (83.1%)	82 (98.8%)	80 (96.4%)	80 (96.4%)	39 (47.0%)	77 (92.8%)	80 (96.4%)	82 (98.8%)	2121 (91.3%)
Yes	3 (3.6%)	1 (1.2%)	42 (50.6%)	3 (3.6%)	3 (3.6%)	5 (6.0%)	3 (3.6%)	5 (6.0%)	2 (2.4%)	2 (2.4%)	1 (1.2%)	3 (3.6%)	6 (7.2%)	6 (7.2%)	5 (6.0%)	5 (6.0%)	3 (3.6%)	3 (3.6%)	3 (3.6%)	24 (28.9%)	14 (16.9%)	1 (1.2%)	3 (3.6%)	3 (3.6%)	44 (53.0%)	6 (7.2%)	3 (3.6%)	1 (1.2%)	203 (8.7%)

table1(~presence_gene|wgs_species_3:gene_abbreviation,data=amr_long)

## Warning in .table1.internal(x = x, labels = labels, groupspan = groupspan, :
## Table has 308 columns. Are you sure this is what you want?

	MSC																												SAU																												SCH																												SCO																												SDEV																												SEQ																												SHAEM																												SPXYL																												SSUC																												SXYL																												Overall
	AAC3 (N=6)	ANT9 (N=6)	APH3-PRIME (N=6)	ARLR (N=6)	ARLS (N=6)	BLAZ (N=6)	DHAP (N=6)	ERM (N=6)	FOSB (N=6)	FOSD (N=6)	FUSF (N=6)	LMRS (N=6)	LNUA (N=6)	MECA (N=6)	MECC (N=6)	MECI (N=6)	MEPA (N=6)	MEPB (N=6)	MEPR (N=6)	MGRA (N=6)	MPHC (N=6)	MSRA (N=6)	NORA (N=6)	NORB (N=6)	RLMH (N=6)	SALA (N=6)	TET38 (N=6)	TETK (N=6)	AAC3 (N=3)	ANT9 (N=3)	APH3-PRIME (N=3)	ARLR (N=3)	ARLS (N=3)	BLAZ (N=3)	DHAP (N=3)	ERM (N=3)	FOSB (N=3)	FOSD (N=3)	FUSF (N=3)	LMRS (N=3)	LNUA (N=3)	MECA (N=3)	MECC (N=3)	MECI (N=3)	MEPA (N=3)	MEPB (N=3)	MEPR (N=3)	MGRA (N=3)	MPHC (N=3)	MSRA (N=3)	NORA (N=3)	NORB (N=3)	RLMH (N=3)	SALA (N=3)	TET38 (N=3)	TETK (N=3)	AAC3 (N=21)	ANT9 (N=21)	APH3-PRIME (N=21)	ARLR (N=21)	ARLS (N=21)	BLAZ (N=21)	DHAP (N=21)	ERM (N=21)	FOSB (N=21)	FOSD (N=21)	FUSF (N=21)	LMRS (N=21)	LNUA (N=21)	MECA (N=21)	MECC (N=21)	MECI (N=21)	MEPA (N=21)	MEPB (N=21)	MEPR (N=21)	MGRA (N=21)	MPHC (N=21)	MSRA (N=21)	NORA (N=21)	NORB (N=21)	RLMH (N=21)	SALA (N=21)	TET38 (N=21)	TETK (N=21)	AAC3 (N=1)	ANT9 (N=1)	APH3-PRIME (N=1)	ARLR (N=1)	ARLS (N=1)	BLAZ (N=1)	DHAP (N=1)	ERM (N=1)	FOSB (N=1)	FOSD (N=1)	FUSF (N=1)	LMRS (N=1)	LNUA (N=1)	MECA (N=1)	MECC (N=1)	MECI (N=1)	MEPA (N=1)	MEPB (N=1)	MEPR (N=1)	MGRA (N=1)	MPHC (N=1)	MSRA (N=1)	NORA (N=1)	NORB (N=1)	RLMH (N=1)	SALA (N=1)	TET38 (N=1)	TETK (N=1)	AAC3 (N=2)	ANT9 (N=2)	APH3-PRIME (N=2)	ARLR (N=2)	ARLS (N=2)	BLAZ (N=2)	DHAP (N=2)	ERM (N=2)	FOSB (N=2)	FOSD (N=2)	FUSF (N=2)	LMRS (N=2)	LNUA (N=2)	MECA (N=2)	MECC (N=2)	MECI (N=2)	MEPA (N=2)	MEPB (N=2)	MEPR (N=2)	MGRA (N=2)	MPHC (N=2)	MSRA (N=2)	NORA (N=2)	NORB (N=2)	RLMH (N=2)	SALA (N=2)	TET38 (N=2)	TETK (N=2)	AAC3 (N=1)	ANT9 (N=1)	APH3-PRIME (N=1)	ARLR (N=1)	ARLS (N=1)	BLAZ (N=1)	DHAP (N=1)	ERM (N=1)	FOSB (N=1)	FOSD (N=1)	FUSF (N=1)	LMRS (N=1)	LNUA (N=1)	MECA (N=1)	MECC (N=1)	MECI (N=1)	MEPA (N=1)	MEPB (N=1)	MEPR (N=1)	MGRA (N=1)	MPHC (N=1)	MSRA (N=1)	NORA (N=1)	NORB (N=1)	RLMH (N=1)	SALA (N=1)	TET38 (N=1)	TETK (N=1)	AAC3 (N=19)	ANT9 (N=19)	APH3-PRIME (N=19)	ARLR (N=19)	ARLS (N=19)	BLAZ (N=19)	DHAP (N=19)	ERM (N=19)	FOSB (N=19)	FOSD (N=19)	FUSF (N=19)	LMRS (N=19)	LNUA (N=19)	MECA (N=19)	MECC (N=19)	MECI (N=19)	MEPA (N=19)	MEPB (N=19)	MEPR (N=19)	MGRA (N=19)	MPHC (N=19)	MSRA (N=19)	NORA (N=19)	NORB (N=19)	RLMH (N=19)	SALA (N=19)	TET38 (N=19)	TETK (N=19)	AAC3 (N=5)	ANT9 (N=5)	APH3-PRIME (N=5)	ARLR (N=5)	ARLS (N=5)	BLAZ (N=5)	DHAP (N=5)	ERM (N=5)	FOSB (N=5)	FOSD (N=5)	FUSF (N=5)	LMRS (N=5)	LNUA (N=5)	MECA (N=5)	MECC (N=5)	MECI (N=5)	MEPA (N=5)	MEPB (N=5)	MEPR (N=5)	MGRA (N=5)	MPHC (N=5)	MSRA (N=5)	NORA (N=5)	NORB (N=5)	RLMH (N=5)	SALA (N=5)	TET38 (N=5)	TETK (N=5)	AAC3 (N=12)	ANT9 (N=12)	APH3-PRIME (N=12)	ARLR (N=12)	ARLS (N=12)	BLAZ (N=12)	DHAP (N=12)	ERM (N=12)	FOSB (N=12)	FOSD (N=12)	FUSF (N=12)	LMRS (N=12)	LNUA (N=12)	MECA (N=12)	MECC (N=12)	MECI (N=12)	MEPA (N=12)	MEPB (N=12)	MEPR (N=12)	MGRA (N=12)	MPHC (N=12)	MSRA (N=12)	NORA (N=12)	NORB (N=12)	RLMH (N=12)	SALA (N=12)	TET38 (N=12)	TETK (N=12)	AAC3 (N=13)	ANT9 (N=13)	APH3-PRIME (N=13)	ARLR (N=13)	ARLS (N=13)	BLAZ (N=13)	DHAP (N=13)	ERM (N=13)	FOSB (N=13)	FOSD (N=13)	FUSF (N=13)	LMRS (N=13)	LNUA (N=13)	MECA (N=13)	MECC (N=13)	MECI (N=13)	MEPA (N=13)	MEPB (N=13)	MEPR (N=13)	MGRA (N=13)	MPHC (N=13)	MSRA (N=13)	NORA (N=13)	NORB (N=13)	RLMH (N=13)	SALA (N=13)	TET38 (N=13)	TETK (N=13)	AAC3 (N=83)	ANT9 (N=83)	APH3-PRIME (N=83)	ARLR (N=83)	ARLS (N=83)	BLAZ (N=83)	DHAP (N=83)	ERM (N=83)	FOSB (N=83)	FOSD (N=83)	FUSF (N=83)	LMRS (N=83)	LNUA (N=83)	MECA (N=83)	MECC (N=83)	MECI (N=83)	MEPA (N=83)	MEPB (N=83)	MEPR (N=83)	MGRA (N=83)	MPHC (N=83)	MSRA (N=83)	NORA (N=83)	NORB (N=83)	RLMH (N=83)	SALA (N=83)	TET38 (N=83)	TETK (N=83)
presence_gene
No	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	5 (83.3%)	6 (100%)	6 (100%)	4 (66.7%)	0 (0%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	0 (0%)	6 (100%)	6 (100%)	0 (0%)	3 (100%)	0 (0%)	0 (0%)	0 (0%)	3 (100%)	0 (0%)	3 (100%)	2 (66.7%)	3 (100%)	3 (100%)	0 (0%)	3 (100%)	3 (100%)	3 (100%)	3 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	3 (100%)	3 (100%)	0 (0%)	0 (0%)	0 (0%)	3 (100%)	0 (0%)	3 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	1 (4.8%)	21 (100%)	21 (100%)	21 (100%)	1 (100%)	0 (0%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	0 (0%)	1 (100%)	0 (0%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	0 (0%)	0 (0%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	0 (0%)	2 (100%)	2 (100%)	0 (0%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	0 (0%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	0 (0%)	2 (100%)	2 (100%)	2 (100%)	1 (100%)	1 (100%)	0 (0%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	0 (0%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	19 (100%)	19 (100%)	0 (0%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	16 (84.2%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	0 (0%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	0 (0%)	19 (100%)	19 (100%)	19 (100%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	0 (0%)	5 (100%)	1 (20.0%)	4 (80.0%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	0 (0%)	0 (0%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	0 (0%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	12 (100%)	12 (100%)	0 (0%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	13 (100%)	13 (100%)	8 (61.5%)	13 (100%)	13 (100%)	13 (100%)	13 (100%)	12 (92.3%)	13 (100%)	12 (92.3%)	13 (100%)	13 (100%)	13 (100%)	13 (100%)	13 (100%)	13 (100%)	13 (100%)	13 (100%)	13 (100%)	13 (100%)	6 (46.2%)	13 (100%)	13 (100%)	13 (100%)	13 (100%)	13 (100%)	13 (100%)	13 (100%)	80 (96.4%)	82 (98.8%)	41 (49.4%)	80 (96.4%)	80 (96.4%)	78 (94.0%)	80 (96.4%)	78 (94.0%)	81 (97.6%)	81 (97.6%)	82 (98.8%)	80 (96.4%)	77 (92.8%)	77 (92.8%)	78 (94.0%)	78 (94.0%)	80 (96.4%)	80 (96.4%)	80 (96.4%)	59 (71.1%)	69 (83.1%)	82 (98.8%)	80 (96.4%)	80 (96.4%)	39 (47.0%)	77 (92.8%)	80 (96.4%)	82 (98.8%)
Yes	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (16.7%)	0 (0%)	0 (0%)	2 (33.3%)	6 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	6 (100%)	0 (0%)	0 (0%)	3 (100%)	0 (0%)	3 (100%)	3 (100%)	3 (100%)	0 (0%)	3 (100%)	0 (0%)	1 (33.3%)	0 (0%)	0 (0%)	3 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	3 (100%)	3 (100%)	3 (100%)	3 (100%)	0 (0%)	0 (0%)	3 (100%)	3 (100%)	3 (100%)	0 (0%)	3 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	20 (95.2%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (100%)	0 (0%)	1 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (100%)	1 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (100%)	0 (0%)	0 (0%)	2 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	2 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	2 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	19 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	3 (15.8%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	19 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	19 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	5 (100%)	0 (0%)	4 (80.0%)	1 (20.0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	5 (100%)	5 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	5 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	12 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	5 (38.5%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (7.7%)	0 (0%)	1 (7.7%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	7 (53.8%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	3 (3.6%)	1 (1.2%)	42 (50.6%)	3 (3.6%)	3 (3.6%)	5 (6.0%)	3 (3.6%)	5 (6.0%)	2 (2.4%)	2 (2.4%)	1 (1.2%)	3 (3.6%)	6 (7.2%)	6 (7.2%)	5 (6.0%)	5 (6.0%)	3 (3.6%)	3 (3.6%)	3 (3.6%)	24 (28.9%)	14 (16.9%)	1 (1.2%)	3 (3.6%)	3 (3.6%)	44 (53.0%)	6 (7.2%)	3 (3.6%)	1 (1.2%)

# Aminoglycosides

table1(~presence_gene_type_isolate|wgs_species_3,data=amr_aminoglycosides_isolate)

	MSC (N=6)	SAU (N=3)	SCH (N=21)	SCO (N=1)	SDEV (N=2)	SEQ (N=1)	SHAEM (N=19)	SPXYL (N=5)	SSUC (N=12)	SUB (N=1)	SXYL (N=13)	Overall (N=84)
presence_gene_type_isolate
0	6 (100%)	0 (0%)	21 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	5 (100%)	0 (0%)	1 (100%)	8 (61.5%)	41 (48.8%)
1	0 (0%)	3 (100%)	0 (0%)	1 (100%)	2 (100%)	1 (100%)	19 (100%)	0 (0%)	12 (100%)	0 (0%)	5 (38.5%)	43 (51.2%)

# Betalactams

table1(~presence_gene_type_isolate|wgs_species_3,data=amr_betalactams_isolate)

	MSC (N=6)	SAU (N=3)	SCH (N=21)	SCO (N=1)	SDEV (N=2)	SEQ (N=1)	SHAEM (N=19)	SPXYL (N=5)	SSUC (N=12)	SUB (N=1)	SXYL (N=13)	Overall (N=84)
presence_gene_type_isolate
0	0 (0%)	3 (100%)	21 (100%)	1 (100%)	2 (100%)	1 (100%)	19 (100%)	0 (0%)	12 (100%)	1 (100%)	13 (100%)	73 (86.9%)
1	6 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	5 (100%)	0 (0%)	0 (0%)	0 (0%)	11 (13.1%)

# Fosfomycin

table1(~presence_gene_type_isolate|wgs_species_3,data=amr_fosfomycin_isolate)

	MSC (N=6)	SAU (N=3)	SCH (N=21)	SCO (N=1)	SDEV (N=2)	SEQ (N=1)	SHAEM (N=19)	SPXYL (N=5)	SSUC (N=12)	SUB (N=1)	SXYL (N=13)	Overall (N=84)
presence_gene_type_isolate
0	5 (83.3%)	2 (66.7%)	21 (100%)	1 (100%)	2 (100%)	1 (100%)	19 (100%)	4 (80.0%)	12 (100%)	1 (100%)	12 (92.3%)	80 (95.2%)
1	1 (16.7%)	1 (33.3%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (20.0%)	0 (0%)	0 (0%)	1 (7.7%)	4 (4.8%)

# Fusidic acid

table1(~presence_gene_type_isolate|wgs_species_3,data=amr_fusidic_acid_isolate)

	MSC (N=6)	SAU (N=3)	SCH (N=21)	SCO (N=1)	SDEV (N=2)	SEQ (N=1)	SHAEM (N=19)	SPXYL (N=5)	SSUC (N=12)	SUB (N=1)	SXYL (N=13)	Overall (N=84)
presence_gene_type_isolate
0	6 (100%)	3 (100%)	21 (100%)	0 (0%)	2 (100%)	1 (100%)	19 (100%)	5 (100%)	12 (100%)	1 (100%)	13 (100%)	83 (98.8%)
1	0 (0%)	0 (0%)	0 (0%)	1 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (1.2%)

# MLS

table1(~presence_gene_type_isolate|wgs_species_3,data=amr_mls_isolate)

	MSC (N=6)	SAU (N=3)	SCH (N=21)	SCO (N=1)	SDEV (N=2)	SEQ (N=1)	SHAEM (N=19)	SPXYL (N=5)	SSUC (N=12)	SUB (N=1)	SXYL (N=13)	Overall (N=84)
presence_gene_type_isolate
0	4 (66.7%)	0 (0%)	1 (4.8%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	12 (100%)	1 (100%)	6 (46.2%)	24 (28.6%)
1	2 (33.3%)	3 (100%)	20 (95.2%)	1 (100%)	2 (100%)	1 (100%)	19 (100%)	5 (100%)	0 (0%)	0 (0%)	7 (53.8%)	60 (71.4%)

# Multicompound resistance

table1(~presence_gene_type_isolate|wgs_species_3,data=amr_multicompound_isolate)

	MSC (N=6)	SAU (N=3)	SCH (N=21)	SCO (N=1)	SDEV (N=2)	SEQ (N=1)	SHAEM (N=19)	SPXYL (N=5)	SSUC (N=12)	SUB (N=1)	SXYL (N=13)	Overall (N=84)
presence_gene_type_isolate
0	6 (100%)	0 (0%)	21 (100%)	1 (100%)	2 (100%)	1 (100%)	19 (100%)	5 (100%)	12 (100%)	1 (100%)	13 (100%)	81 (96.4%)
1	0 (0%)	3 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	3 (3.6%)

# Multidrug resistance

table1(~presence_gene_type_isolate|wgs_species_3,data=amr_multidrug_isolate)

	MSC (N=6)	SAU (N=3)	SCH (N=21)	SCO (N=1)	SDEV (N=2)	SEQ (N=1)	SHAEM (N=19)	SPXYL (N=5)	SSUC (N=12)	SUB (N=1)	SXYL (N=13)	Overall (N=84)
presence_gene_type_isolate
0	0 (0%)	0 (0%)	21 (100%)	1 (100%)	0 (0%)	1 (100%)	0 (0%)	5 (100%)	12 (100%)	1 (100%)	13 (100%)	54 (64.3%)
1	6 (100%)	3 (100%)	0 (0%)	0 (0%)	2 (100%)	0 (0%)	19 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	30 (35.7%)

# Phenicol

table1(~presence_gene_type_isolate|wgs_species_3,data=amr_phenicol_isolate)

	MSC (N=6)	SAU (N=3)	SCH (N=21)	SCO (N=1)	SDEV (N=2)	SEQ (N=1)	SHAEM (N=19)	SPXYL (N=5)	SSUC (N=12)	SUB (N=1)	SXYL (N=13)	Overall (N=84)
presence_gene_type_isolate
0	6 (100%)	0 (0%)	21 (100%)	1 (100%)	2 (100%)	1 (100%)	19 (100%)	5 (100%)	12 (100%)	1 (100%)	13 (100%)	81 (96.4%)
1	0 (0%)	3 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	3 (3.6%)

# Tetracylines

table1(~presence_gene_type_isolate|wgs_species_3,data=amr_tetracyclines_isolate)

	MSC (N=6)	SAU (N=3)	SCH (N=21)	SCO (N=1)	SDEV (N=2)	SEQ (N=1)	SHAEM (N=19)	SPXYL (N=5)	SSUC (N=12)	SUB (N=1)	SXYL (N=13)	Overall (N=84)
presence_gene_type_isolate
0	6 (100%)	0 (0%)	21 (100%)	0 (0%)	2 (100%)	1 (100%)	19 (100%)	5 (100%)	12 (100%)	1 (100%)	13 (100%)	80 (95.2%)
1	0 (0%)	3 (100%)	0 (0%)	1 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	4 (4.8%)

# --- Plasmids ----

plasmid_summary <- plasmid_long %>%
  group_by(wgs_isolate_id,wgs_species_3) %>%
  summarise(n_plasmids=as.factor(sum(presence)),presence_plasmid=as.factor(ifelse(sum(presence)>0,1,0)))

## `summarise()` has grouped output by 'wgs_isolate_id'. You can override using
## the `.groups` argument.

table1::table1(~n_plasmids + presence_plasmid|wgs_species_3, data= plasmid_summary)

	MSC (N=6)	SAU (N=3)	SCH (N=21)	SCO (N=1)	SDEV (N=2)	SEQ (N=1)	SHAEM (N=19)	SPXYL (N=5)	SSUC (N=12)	SXYL (N=13)	Overall (N=83)
n_plasmids
0	3 (50.0%)	0 (0%)	20 (95.2%)	0 (0%)	0 (0%)	0 (0%)	9 (47.4%)	0 (0%)	6 (50.0%)	0 (0%)	38 (45.8%)
1	2 (33.3%)	2 (66.7%)	1 (4.8%)	0 (0%)	1 (50.0%)	0 (0%)	5 (26.3%)	0 (0%)	4 (33.3%)	6 (46.2%)	21 (25.3%)
2	1 (16.7%)	0 (0%)	0 (0%)	0 (0%)	1 (50.0%)	1 (100%)	4 (21.1%)	5 (100%)	2 (16.7%)	6 (46.2%)	20 (24.1%)
3	0 (0%)	1 (33.3%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (5.3%)	0 (0%)	0 (0%)	1 (7.7%)	3 (3.6%)
6	0 (0%)	0 (0%)	0 (0%)	1 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (1.2%)
presence_plasmid
0	3 (50.0%)	0 (0%)	20 (95.2%)	0 (0%)	0 (0%)	0 (0%)	9 (47.4%)	0 (0%)	6 (50.0%)	0 (0%)	38 (45.8%)
1	3 (50.0%)	3 (100%)	1 (4.8%)	1 (100%)	2 (100%)	1 (100%)	10 (52.6%)	5 (100%)	6 (50.0%)	13 (100%)	45 (54.2%)

table1::table1(~presence_gene|gene_abbreviation, data= plasmid_long)

## Warning in .table1.internal(x = x, labels = labels, groupspan = groupspan, :
## Table has 30 columns. Are you sure this is what you want?

	Cassette (N=83)	pETB (N=83)	pKH13 (N=83)	pKH21 (N=83)	pLNU1 (N=83)	pLNU3 (N=83)	pLNU9 (N=83)	pLUG10 (N=83)	pN315 (N=83)	pRJ9 (N=83)	pS0385 (N=83)	pSaa6159 (N=83)	pSAS (N=83)	pSE12228p05 (N=83)	pSHaeA (N=83)	pSJH101 (N=83)	pSK1 (N=83)	pSK41 (N=83)	pSSAP2 (N=83)	pSSP1 (N=83)	pSSP2 (N=83)	pTW20 (N=83)	pvSw4 (N=83)	SAP016A (N=83)	SAP019A (N=83)	SAP099B (N=166)	SAP105A (N=83)	SAP108C (N=83)	VRSAp (N=83)	Overall (N=2490)
presence_gene
No	82 (98.8%)	82 (98.8%)	82 (98.8%)	82 (98.8%)	82 (98.8%)	82 (98.8%)	78 (94.0%)	82 (98.8%)	82 (98.8%)	81 (97.6%)	82 (98.8%)	79 (95.2%)	82 (98.8%)	81 (97.6%)	82 (98.8%)	74 (89.2%)	82 (98.8%)	82 (98.8%)	73 (88.0%)	82 (98.8%)	80 (96.4%)	81 (97.6%)	74 (89.2%)	77 (92.8%)	82 (98.8%)	164 (98.8%)	79 (95.2%)	81 (97.6%)	82 (98.8%)	2414 (96.9%)
Yes	1 (1.2%)	1 (1.2%)	1 (1.2%)	1 (1.2%)	1 (1.2%)	1 (1.2%)	5 (6.0%)	1 (1.2%)	1 (1.2%)	2 (2.4%)	1 (1.2%)	4 (4.8%)	1 (1.2%)	2 (2.4%)	1 (1.2%)	9 (10.8%)	1 (1.2%)	1 (1.2%)	10 (12.0%)	1 (1.2%)	3 (3.6%)	2 (2.4%)	9 (10.8%)	6 (7.2%)	1 (1.2%)	2 (1.2%)	4 (4.8%)	2 (2.4%)	1 (1.2%)	76 (3.1%)

table1::table1(~presence_gene|wgs_species_3:gene_abbreviation, data= plasmid_long)

## Warning in .table1.internal(x = x, labels = labels, groupspan = groupspan, :
## Table has 319 columns. Are you sure this is what you want?

	MSC																													SAU																													SCH																													SCO																													SDEV																													SEQ																													SHAEM																													SPXYL																													SSUC																													SXYL																													Overall
	Cassette (N=6)	pETB (N=6)	pKH13 (N=6)	pKH21 (N=6)	pLNU1 (N=6)	pLNU3 (N=6)	pLNU9 (N=6)	pLUG10 (N=6)	pN315 (N=6)	pRJ9 (N=6)	pS0385 (N=6)	pSaa6159 (N=6)	pSAS (N=6)	pSE12228p05 (N=6)	pSHaeA (N=6)	pSJH101 (N=6)	pSK1 (N=6)	pSK41 (N=6)	pSSAP2 (N=6)	pSSP1 (N=6)	pSSP2 (N=6)	pTW20 (N=6)	pvSw4 (N=6)	SAP016A (N=6)	SAP019A (N=6)	SAP099B (N=12)	SAP105A (N=6)	SAP108C (N=6)	VRSAp (N=6)	Cassette (N=3)	pETB (N=3)	pKH13 (N=3)	pKH21 (N=3)	pLNU1 (N=3)	pLNU3 (N=3)	pLNU9 (N=3)	pLUG10 (N=3)	pN315 (N=3)	pRJ9 (N=3)	pS0385 (N=3)	pSaa6159 (N=3)	pSAS (N=3)	pSE12228p05 (N=3)	pSHaeA (N=3)	pSJH101 (N=3)	pSK1 (N=3)	pSK41 (N=3)	pSSAP2 (N=3)	pSSP1 (N=3)	pSSP2 (N=3)	pTW20 (N=3)	pvSw4 (N=3)	SAP016A (N=3)	SAP019A (N=3)	SAP099B (N=6)	SAP105A (N=3)	SAP108C (N=3)	VRSAp (N=3)	Cassette (N=21)	pETB (N=21)	pKH13 (N=21)	pKH21 (N=21)	pLNU1 (N=21)	pLNU3 (N=21)	pLNU9 (N=21)	pLUG10 (N=21)	pN315 (N=21)	pRJ9 (N=21)	pS0385 (N=21)	pSaa6159 (N=21)	pSAS (N=21)	pSE12228p05 (N=21)	pSHaeA (N=21)	pSJH101 (N=21)	pSK1 (N=21)	pSK41 (N=21)	pSSAP2 (N=21)	pSSP1 (N=21)	pSSP2 (N=21)	pTW20 (N=21)	pvSw4 (N=21)	SAP016A (N=21)	SAP019A (N=21)	SAP099B (N=42)	SAP105A (N=21)	SAP108C (N=21)	VRSAp (N=21)	Cassette (N=1)	pETB (N=1)	pKH13 (N=1)	pKH21 (N=1)	pLNU1 (N=1)	pLNU3 (N=1)	pLNU9 (N=1)	pLUG10 (N=1)	pN315 (N=1)	pRJ9 (N=1)	pS0385 (N=1)	pSaa6159 (N=1)	pSAS (N=1)	pSE12228p05 (N=1)	pSHaeA (N=1)	pSJH101 (N=1)	pSK1 (N=1)	pSK41 (N=1)	pSSAP2 (N=1)	pSSP1 (N=1)	pSSP2 (N=1)	pTW20 (N=1)	pvSw4 (N=1)	SAP016A (N=1)	SAP019A (N=1)	SAP099B (N=2)	SAP105A (N=1)	SAP108C (N=1)	VRSAp (N=1)	Cassette (N=2)	pETB (N=2)	pKH13 (N=2)	pKH21 (N=2)	pLNU1 (N=2)	pLNU3 (N=2)	pLNU9 (N=2)	pLUG10 (N=2)	pN315 (N=2)	pRJ9 (N=2)	pS0385 (N=2)	pSaa6159 (N=2)	pSAS (N=2)	pSE12228p05 (N=2)	pSHaeA (N=2)	pSJH101 (N=2)	pSK1 (N=2)	pSK41 (N=2)	pSSAP2 (N=2)	pSSP1 (N=2)	pSSP2 (N=2)	pTW20 (N=2)	pvSw4 (N=2)	SAP016A (N=2)	SAP019A (N=2)	SAP099B (N=4)	SAP105A (N=2)	SAP108C (N=2)	VRSAp (N=2)	Cassette (N=1)	pETB (N=1)	pKH13 (N=1)	pKH21 (N=1)	pLNU1 (N=1)	pLNU3 (N=1)	pLNU9 (N=1)	pLUG10 (N=1)	pN315 (N=1)	pRJ9 (N=1)	pS0385 (N=1)	pSaa6159 (N=1)	pSAS (N=1)	pSE12228p05 (N=1)	pSHaeA (N=1)	pSJH101 (N=1)	pSK1 (N=1)	pSK41 (N=1)	pSSAP2 (N=1)	pSSP1 (N=1)	pSSP2 (N=1)	pTW20 (N=1)	pvSw4 (N=1)	SAP016A (N=1)	SAP019A (N=1)	SAP099B (N=2)	SAP105A (N=1)	SAP108C (N=1)	VRSAp (N=1)	Cassette (N=19)	pETB (N=19)	pKH13 (N=19)	pKH21 (N=19)	pLNU1 (N=19)	pLNU3 (N=19)	pLNU9 (N=19)	pLUG10 (N=19)	pN315 (N=19)	pRJ9 (N=19)	pS0385 (N=19)	pSaa6159 (N=19)	pSAS (N=19)	pSE12228p05 (N=19)	pSHaeA (N=19)	pSJH101 (N=19)	pSK1 (N=19)	pSK41 (N=19)	pSSAP2 (N=19)	pSSP1 (N=19)	pSSP2 (N=19)	pTW20 (N=19)	pvSw4 (N=19)	SAP016A (N=19)	SAP019A (N=19)	SAP099B (N=38)	SAP105A (N=19)	SAP108C (N=19)	VRSAp (N=19)	Cassette (N=5)	pETB (N=5)	pKH13 (N=5)	pKH21 (N=5)	pLNU1 (N=5)	pLNU3 (N=5)	pLNU9 (N=5)	pLUG10 (N=5)	pN315 (N=5)	pRJ9 (N=5)	pS0385 (N=5)	pSaa6159 (N=5)	pSAS (N=5)	pSE12228p05 (N=5)	pSHaeA (N=5)	pSJH101 (N=5)	pSK1 (N=5)	pSK41 (N=5)	pSSAP2 (N=5)	pSSP1 (N=5)	pSSP2 (N=5)	pTW20 (N=5)	pvSw4 (N=5)	SAP016A (N=5)	SAP019A (N=5)	SAP099B (N=10)	SAP105A (N=5)	SAP108C (N=5)	VRSAp (N=5)	Cassette (N=12)	pETB (N=12)	pKH13 (N=12)	pKH21 (N=12)	pLNU1 (N=12)	pLNU3 (N=12)	pLNU9 (N=12)	pLUG10 (N=12)	pN315 (N=12)	pRJ9 (N=12)	pS0385 (N=12)	pSaa6159 (N=12)	pSAS (N=12)	pSE12228p05 (N=12)	pSHaeA (N=12)	pSJH101 (N=12)	pSK1 (N=12)	pSK41 (N=12)	pSSAP2 (N=12)	pSSP1 (N=12)	pSSP2 (N=12)	pTW20 (N=12)	pvSw4 (N=12)	SAP016A (N=12)	SAP019A (N=12)	SAP099B (N=24)	SAP105A (N=12)	SAP108C (N=12)	VRSAp (N=12)	Cassette (N=13)	pETB (N=13)	pKH13 (N=13)	pKH21 (N=13)	pLNU1 (N=13)	pLNU3 (N=13)	pLNU9 (N=13)	pLUG10 (N=13)	pN315 (N=13)	pRJ9 (N=13)	pS0385 (N=13)	pSaa6159 (N=13)	pSAS (N=13)	pSE12228p05 (N=13)	pSHaeA (N=13)	pSJH101 (N=13)	pSK1 (N=13)	pSK41 (N=13)	pSSAP2 (N=13)	pSSP1 (N=13)	pSSP2 (N=13)	pTW20 (N=13)	pvSw4 (N=13)	SAP016A (N=13)	SAP019A (N=13)	SAP099B (N=26)	SAP105A (N=13)	SAP108C (N=13)	VRSAp (N=13)	Cassette (N=83)	pETB (N=83)	pKH13 (N=83)	pKH21 (N=83)	pLNU1 (N=83)	pLNU3 (N=83)	pLNU9 (N=83)	pLUG10 (N=83)	pN315 (N=83)	pRJ9 (N=83)	pS0385 (N=83)	pSaa6159 (N=83)	pSAS (N=83)	pSE12228p05 (N=83)	pSHaeA (N=83)	pSJH101 (N=83)	pSK1 (N=83)	pSK41 (N=83)	pSSAP2 (N=83)	pSSP1 (N=83)	pSSP2 (N=83)	pTW20 (N=83)	pvSw4 (N=83)	SAP016A (N=83)	SAP019A (N=83)	SAP099B (N=166)	SAP105A (N=83)	SAP108C (N=83)	VRSAp (N=83)
presence_gene
No	6 (100%)	6 (100%)	5 (83.3%)	6 (100%)	5 (83.3%)	6 (100%)	5 (83.3%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	5 (83.3%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	6 (100%)	12 (100%)	6 (100%)	6 (100%)	6 (100%)	3 (100%)	3 (100%)	3 (100%)	3 (100%)	3 (100%)	3 (100%)	3 (100%)	3 (100%)	2 (66.7%)	1 (33.3%)	3 (100%)	3 (100%)	3 (100%)	3 (100%)	3 (100%)	2 (66.7%)	3 (100%)	3 (100%)	3 (100%)	3 (100%)	3 (100%)	3 (100%)	3 (100%)	3 (100%)	2 (66.7%)	6 (100%)	3 (100%)	3 (100%)	3 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	20 (95.2%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	21 (100%)	42 (100%)	21 (100%)	21 (100%)	21 (100%)	0 (0%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	0 (0%)	0 (0%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	0 (0%)	1 (100%)	1 (100%)	0 (0%)	1 (100%)	1 (100%)	0 (0%)	1 (100%)	1 (100%)	2 (100%)	1 (100%)	1 (100%)	1 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	1 (50.0%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	2 (100%)	4 (100%)	1 (50.0%)	2 (100%)	1 (50.0%)	1 (100%)	1 (100%)	1 (100%)	0 (0%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	0 (0%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	1 (100%)	2 (100%)	1 (100%)	1 (100%)	1 (100%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	16 (84.2%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	18 (94.7%)	18 (94.7%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	19 (100%)	13 (68.4%)	19 (100%)	36 (94.7%)	16 (84.2%)	19 (100%)	19 (100%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	5 (100%)	4 (80.0%)	5 (100%)	5 (100%)	1 (20.0%)	5 (100%)	5 (100%)	5 (100%)	0 (0%)	5 (100%)	5 (100%)	10 (100%)	5 (100%)	5 (100%)	5 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	11 (91.7%)	8 (66.7%)	12 (100%)	12 (100%)	12 (100%)	11 (91.7%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	12 (100%)	10 (83.3%)	12 (100%)	12 (100%)	24 (100%)	12 (100%)	12 (100%)	12 (100%)	13 (100%)	12 (92.3%)	13 (100%)	13 (100%)	13 (100%)	12 (92.3%)	13 (100%)	13 (100%)	13 (100%)	13 (100%)	13 (100%)	13 (100%)	12 (92.3%)	13 (100%)	13 (100%)	9 (69.2%)	13 (100%)	13 (100%)	7 (53.8%)	13 (100%)	10 (76.9%)	11 (84.6%)	12 (92.3%)	13 (100%)	13 (100%)	26 (100%)	13 (100%)	11 (84.6%)	13 (100%)	82 (98.8%)	82 (98.8%)	82 (98.8%)	82 (98.8%)	82 (98.8%)	82 (98.8%)	78 (94.0%)	82 (98.8%)	82 (98.8%)	81 (97.6%)	82 (98.8%)	79 (95.2%)	82 (98.8%)	81 (97.6%)	82 (98.8%)	74 (89.2%)	82 (98.8%)	82 (98.8%)	73 (88.0%)	82 (98.8%)	80 (96.4%)	81 (97.6%)	74 (89.2%)	77 (92.8%)	82 (98.8%)	164 (98.8%)	79 (95.2%)	81 (97.6%)	82 (98.8%)
Yes	0 (0%)	0 (0%)	1 (16.7%)	0 (0%)	1 (16.7%)	0 (0%)	1 (16.7%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (16.7%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (33.3%)	2 (66.7%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (33.3%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (33.3%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (4.8%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (100%)	1 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (100%)	0 (0%)	0 (0%)	1 (100%)	0 (0%)	0 (0%)	1 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (50.0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (50.0%)	0 (0%)	1 (50.0%)	0 (0%)	0 (0%)	0 (0%)	1 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	3 (15.8%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (5.3%)	1 (5.3%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	6 (31.6%)	0 (0%)	2 (5.3%)	3 (15.8%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (20.0%)	0 (0%)	0 (0%)	4 (80.0%)	0 (0%)	0 (0%)	0 (0%)	5 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (8.3%)	4 (33.3%)	0 (0%)	0 (0%)	0 (0%)	1 (8.3%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	2 (16.7%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (7.7%)	0 (0%)	0 (0%)	0 (0%)	1 (7.7%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	1 (7.7%)	0 (0%)	0 (0%)	4 (30.8%)	0 (0%)	0 (0%)	6 (46.2%)	0 (0%)	3 (23.1%)	2 (15.4%)	1 (7.7%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	2 (15.4%)	0 (0%)	1 (1.2%)	1 (1.2%)	1 (1.2%)	1 (1.2%)	1 (1.2%)	1 (1.2%)	5 (6.0%)	1 (1.2%)	1 (1.2%)	2 (2.4%)	1 (1.2%)	4 (4.8%)	1 (1.2%)	2 (2.4%)	1 (1.2%)	9 (10.8%)	1 (1.2%)	1 (1.2%)	10 (12.0%)	1 (1.2%)	3 (3.6%)	2 (2.4%)	9 (10.8%)	6 (7.2%)	1 (1.2%)	2 (1.2%)	4 (4.8%)	2 (2.4%)	1 (1.2%)

# Chromogenes 

# No plasmids = or > isolates

# Haemolyticus

model_1_plasmids_sau <- lm(mic_sau ~ as.factor(`rep13_7_rep(pLNU9)`) ,data=mb_haemolyticus_plasmids)
summary(model_1_plasmids_sau)

## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep13_7_rep(pLNU9)`), data = mb_haemolyticus_plasmids)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.77250 -0.07458  0.10750  0.41250  0.72750 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        3.9325     0.1928  20.400 2.96e-11 ***
## as.factor(`rep13_7_rep(pLNU9)`)1   0.2042     0.4310   0.474    0.644    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6678 on 13 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.01697,    Adjusted R-squared:  -0.05865 
## F-statistic: 0.2244 on 1 and 13 DF,  p-value: 0.6436

model_1_plasmids_sub <- lm(mic_sub ~ as.factor(`rep13_7_rep(pLNU9)`),data=mb_haemolyticus_plasmids)
summary(model_1_plasmids_sub)

## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep13_7_rep(pLNU9)`), data = mb_haemolyticus_plasmids)
## 
## Residuals:
##          1          6         10         11         13         15 
## -1.360e-01  1.388e-17 -1.416e+00  6.940e-01  3.940e-01  4.640e-01 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        3.5760     0.3792   9.431 0.000705 ***
## as.factor(`rep13_7_rep(pLNU9)`)1  -0.3960     0.9288  -0.426 0.691799    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8479 on 4 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.04347,    Adjusted R-squared:  -0.1957 
## F-statistic: 0.1818 on 1 and 4 DF,  p-value: 0.6918

model_2_plasmids_sau <- lm(mic_sau ~ as.factor(`rep39_2_repA(SAP016A)`),data=mb_haemolyticus_plasmids)
summary(model_2_plasmids_sau)

## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep39_2_repA(SAP016A)`), data = mb_haemolyticus_plasmids)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.59778 -0.14722  0.07222  0.32278  0.80222 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           3.7578     0.2036  18.454 1.05e-10 ***
## as.factor(`rep39_2_repA(SAP016A)`)1   0.5389     0.3220   1.674    0.118    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6109 on 13 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.1773, Adjusted R-squared:  0.114 
## F-statistic: 2.801 on 1 and 13 DF,  p-value: 0.1181

model_2_plasmids_sub <- lm(mic_sub ~ as.factor(`rep39_2_repA(SAP016A)`),data=mb_haemolyticus_plasmids)
summary(model_2_plasmids_sub)

## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep39_2_repA(SAP016A)`), data = mb_haemolyticus_plasmids)
## 
## Residuals:
##     1     6    10    11    13    15 
## -0.19 -0.45 -1.23  0.64  0.58  0.65 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                           3.3900     0.4933   6.872  0.00235 **
## as.factor(`rep39_2_repA(SAP016A)`)1   0.2400     0.6976   0.344  0.74815   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8544 on 4 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.02874,    Adjusted R-squared:  -0.2141 
## F-statistic: 0.1184 on 1 and 4 DF,  p-value: 0.7481

model_4_plasmids_sau <- lm(mic_sau ~ as.factor(`rep19b_1_repA(SAP105A)`),data=mb_haemolyticus_plasmids)
summary(model_4_plasmids_sau)

## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep19b_1_repA(SAP105A)`), data = mb_haemolyticus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7517 -0.1308  0.1783  0.3133  0.7483 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            3.9117     0.1906  20.520 2.75e-11 ***
## as.factor(`rep19b_1_repA(SAP105A)`)1   0.3083     0.4263   0.723    0.482    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6603 on 13 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.03869,    Adjusted R-squared:  -0.03525 
## F-statistic: 0.5232 on 1 and 13 DF,  p-value: 0.4823

model_4_plasmids_sub <- lm(mic_sub ~ as.factor(`rep19b_1_repA(SAP105A)`),data=mb_haemolyticus_plasmids)
summary(model_4_plasmids_sub)

## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep19b_1_repA(SAP105A)`), data = mb_haemolyticus_plasmids)
## 
## Residuals:
##       1       6      10      11      13      15 
## -0.1133 -0.3733 -1.3067  0.8033  0.5033  0.4867 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                           3.46667    0.49959   6.939  0.00227 **
## as.factor(`rep19b_1_repA(SAP105A)`)1  0.08667    0.70653   0.123  0.90829   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8653 on 4 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.003748,   Adjusted R-squared:  -0.2453 
## F-statistic: 0.01505 on 1 and 4 DF,  p-value: 0.9083

# Succinus (psaa6159 associated)

model_5_plasmids_sau <- lm(mic_sau ~ as.factor(`rep16_3_rep(pSaa6159)`) ,data=mb_succinus_plasmids)
summary(model_5_plasmids_sau)

## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep16_3_rep(pSaa6159)`), data = mb_succinus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2325 -0.1462 -0.0300  0.3137  0.9575 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           3.1525     0.2120  14.870 1.22e-07 ***
## as.factor(`rep16_3_rep(pSaa6159)`)1  -0.8425     0.4060  -2.075   0.0678 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5996 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.3237, Adjusted R-squared:  0.2485 
## F-statistic: 4.307 on 1 and 9 DF,  p-value: 0.06778

Confint(model_5_plasmids_sau)

##                                     Estimate     2.5 %     97.5 %
## (Intercept)                           3.1525  2.672909 3.63209080
## as.factor(`rep16_3_rep(pSaa6159)`)1  -0.8425 -1.760846 0.07584647

model_5_plasmids_sub <- lm(mic_sub ~ as.factor(`rep16_3_rep(pSaa6159)`),data=mb_succinus_plasmids)
summary(model_5_plasmids_sub)

## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep16_3_rep(pSaa6159)`), data = mb_succinus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5025 -0.3713 -0.0300  0.7025  0.9875 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           3.1525     0.3144   10.03  3.5e-06 ***
## as.factor(`rep16_3_rep(pSaa6159)`)1  -0.8425     0.6020   -1.40    0.195    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8892 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1787, Adjusted R-squared:  0.08748 
## F-statistic: 1.959 on 1 and 9 DF,  p-value: 0.1952

Confint(model_5_plasmids_sub)

##                                     Estimate     2.5 %    97.5 %
## (Intercept)                           3.1525  2.441321 3.8636792
## as.factor(`rep16_3_rep(pSaa6159)`)1  -0.8425 -2.204305 0.5193046

model_6_plasmids_sau <- lm(mic_sau ~ as.factor(`rep19c_2_rep(pvSw4)`) ,data=mb_succinus_plasmids)
summary(model_6_plasmids_sau)

## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep19c_2_rep(pvSw4)`), data = mb_succinus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8456 -0.3706 -0.0500  0.3044  1.3444 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         2.7656     0.2097  13.188 3.43e-07 ***
## as.factor(`rep19c_2_rep(pvSw4)`)1   0.8644     0.4918   1.758    0.113    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6291 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2556, Adjusted R-squared:  0.1728 
## F-statistic:  3.09 on 1 and 9 DF,  p-value: 0.1127

Confint(model_6_plasmids_sau)

##                                    Estimate      2.5 %   97.5 %
## (Intercept)                       2.7655556  2.2911731 3.239938
## as.factor(`rep19c_2_rep(pvSw4)`)1 0.8644444 -0.2480809 1.976970

model_6_plasmids_sub <- lm(mic_sub ~ as.factor(`rep19c_2_rep(pvSw4)`),data=mb_succinus_plasmids)
summary(model_6_plasmids_sub)

## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep19c_2_rep(pvSw4)`), data = mb_succinus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2267 -0.6667 -0.3667  0.8517  1.2633 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         2.8767     0.3251   8.849  9.8e-06 ***
## as.factor(`rep19c_2_rep(pvSw4)`)1   0.2533     0.7624   0.332    0.747    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9752 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.01212,    Adjusted R-squared:  -0.09764 
## F-statistic: 0.1104 on 1 and 9 DF,  p-value: 0.7473

Confint(model_6_plasmids_sub)

##                                    Estimate     2.5 %   97.5 %
## (Intercept)                       2.8766667  2.141286 3.612047
## as.factor(`rep19c_2_rep(pvSw4)`)1 0.2533333 -1.471287 1.977953

# Xylosus & pseudoxylosus (pSSP2 associated)

model_7_plasmids_sau <- lm(mic_sau ~ as.factor(`rep16_2_CDS6(pSJH101)`) ,data=mb_xylosus_plasmids)
summary(model_7_plasmids_sau)

## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep16_2_CDS6(pSJH101)`), data = mb_xylosus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3800 -0.2825 -0.1640  0.0900  1.0100 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           3.0800     0.1186  25.971 1.65e-14 ***
## as.factor(`rep16_2_CDS6(pSJH101)`)1  -0.0920     0.2250  -0.409    0.688    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4276 on 16 degrees of freedom
## Multiple R-squared:  0.01034,    Adjusted R-squared:  -0.05151 
## F-statistic: 0.1672 on 1 and 16 DF,  p-value: 0.6881

Confint(model_7_plasmids_sau)

##                                     Estimate      2.5 %    97.5 %
## (Intercept)                            3.080  2.8285895 3.3314105
## as.factor(`rep16_2_CDS6(pSJH101)`)1   -0.092 -0.5690178 0.3850178

model_7_plasmids_sub <- lm(mic_sub ~ as.factor(`rep16_2_CDS6(pSJH101)`),data=mb_xylosus_plasmids)
summary(model_7_plasmids_sub)

## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep16_2_CDS6(pSJH101)`), data = mb_xylosus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3700 -0.3685 -0.1100  0.5680  1.0100 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           3.0800     0.1822  16.904 1.26e-11 ***
## as.factor(`rep16_2_CDS6(pSJH101)`)1  -0.0920     0.3457  -0.266    0.794    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.657 on 16 degrees of freedom
## Multiple R-squared:  0.004407,   Adjusted R-squared:  -0.05782 
## F-statistic: 0.07082 on 1 and 16 DF,  p-value: 0.7935

Confint(model_7_plasmids_sub)

##                                     Estimate      2.5 %    97.5 %
## (Intercept)                            3.080  2.6937382 3.4662618
## as.factor(`rep16_2_CDS6(pSJH101)`)1   -0.092 -0.8248802 0.6408802

model_8_plasmids_sau <- lm(mic_sau ~ as.factor(`rep20_14_repA(pSSAP2)`) ,data=mb_xylosus_plasmids)
summary(model_8_plasmids_sau)

## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep20_14_repA(pSSAP2)`), data = mb_xylosus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4350 -0.2297 -0.1487  0.1634  0.9550 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           2.9538     0.1482  19.935 1.01e-12 ***
## as.factor(`rep20_14_repA(pSSAP2)`)1   0.1812     0.1988   0.912    0.375    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4191 on 16 degrees of freedom
## Multiple R-squared:  0.04939,    Adjusted R-squared:  -0.01002 
## F-statistic: 0.8313 on 1 and 16 DF,  p-value: 0.3754

Confint(model_8_plasmids_sau)

##                                     Estimate     2.5 %   97.5 %
## (Intercept)                          2.95375  2.639650 3.267850
## as.factor(`rep20_14_repA(pSSAP2)`)1  0.18125 -0.240159 0.602659

model_8_plasmids_sub <- lm(mic_sub ~ as.factor(`rep20_14_repA(pSSAP2)`),data=mb_xylosus_plasmids)
summary(model_8_plasmids_sub)

## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep20_14_repA(pSSAP2)`), data = mb_xylosus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3250 -0.3378 -0.0650  0.5487  1.0550 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                          3.07875    0.23264   13.23 4.92e-10 ***
## as.factor(`rep20_14_repA(pSSAP2)`)1 -0.04375    0.31212   -0.14     0.89    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.658 on 16 degrees of freedom
## Multiple R-squared:  0.001226,   Adjusted R-squared:  -0.0612 
## F-statistic: 0.01965 on 1 and 16 DF,  p-value: 0.8903

Confint(model_8_plasmids_sub)

##                                     Estimate      2.5 %    97.5 %
## (Intercept)                          3.07875  2.5855752 3.5719248
## as.factor(`rep20_14_repA(pSSAP2)`)1 -0.04375 -0.7054135 0.6179135

model_8_plasmids_sau <- lm(mic_sau ~ as.factor(`rep19c_4_rep(pSSP2)`) ,data=mb_xylosus_plasmids)
summary(model_8_plasmids_sau)

## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep19c_4_rep(pSSP2)`), data = mb_xylosus_plasmids)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48067 -0.27067 -0.14700  0.07683  0.99933 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         3.0907     0.1087  28.422 4.01e-15 ***
## as.factor(`rep19c_4_rep(pSSP2)`)1  -0.2173     0.2664  -0.816    0.427    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4212 on 16 degrees of freedom
## Multiple R-squared:  0.03995,    Adjusted R-squared:  -0.02006 
## F-statistic: 0.6657 on 1 and 16 DF,  p-value: 0.4265

Confint(model_8_plasmids_sau)

##                                     Estimate      2.5 %  97.5 %
## (Intercept)                        3.0906667  2.8601438 3.32119
## as.factor(`rep19c_4_rep(pSSP2)`)1 -0.2173333 -0.7819967 0.34733

model_8_plasmids_sub <- lm(mic_sub ~as.factor(`rep19c_4_rep(pSSP2)`),data=mb_xylosus_plasmids)
summary(model_8_plasmids_sub)

## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep19c_4_rep(pSSP2)`), data = mb_xylosus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6640 -0.3915 -0.1140  0.5503  0.8660 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         3.2240     0.1346  23.951 5.84e-14 ***
## as.factor(`rep19c_4_rep(pSSP2)`)1  -1.0173     0.3297  -3.085  0.00709 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5213 on 16 degrees of freedom
## Multiple R-squared:  0.373,  Adjusted R-squared:  0.3339 
## F-statistic:  9.52 on 1 and 16 DF,  p-value: 0.007093

Confint(model_8_plasmids_sub)

##                                    Estimate     2.5 %     97.5 %
## (Intercept)                        3.224000  2.938644  3.5093559
## as.factor(`rep19c_4_rep(pSSP2)`)1 -1.017333 -1.716310 -0.3183569

model_9_plasmids_sau <- lm(mic_sau ~ as.factor(`rep20_3_rep(pTW20)`) ,data=mb_xylosus_plasmids)
summary(model_9_plasmids_sau)

## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep20_3_rep(pTW20)`), data = mb_xylosus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4300 -0.2850 -0.1250  0.1275  1.0500 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        3.0400     0.1069  28.435 3.98e-15 ***
## as.factor(`rep20_3_rep(pTW20)`)1   0.1300     0.3207   0.405    0.691    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4276 on 16 degrees of freedom
## Multiple R-squared:  0.01016,    Adjusted R-squared:  -0.0517 
## F-statistic: 0.1643 on 1 and 16 DF,  p-value: 0.6906

Confint(model_9_plasmids_sau)

##                                  Estimate      2.5 %    97.5 %
## (Intercept)                          3.04  2.8133615 3.2666385
## as.factor(`rep20_3_rep(pTW20)`)1     0.13 -0.5499154 0.8099154

model_9_plasmids_sub <- lm(mic_sub ~as.factor(`rep20_3_rep(pTW20)`),data=mb_xylosus_plasmids)
summary(model_9_plasmids_sub)

## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep20_3_rep(pTW20)`), data = mb_xylosus_plasmids)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -1.33  -0.33  -0.07   0.49   1.05 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        3.0400     0.1642  18.509 3.15e-12 ***
## as.factor(`rep20_3_rep(pTW20)`)1   0.1300     0.4927   0.264    0.795    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.657 on 16 degrees of freedom
## Multiple R-squared:  0.004332,   Adjusted R-squared:  -0.0579 
## F-statistic: 0.06961 on 1 and 16 DF,  p-value: 0.7953

Confint(model_9_plasmids_sub)

##                                  Estimate      2.5 %   97.5 %
## (Intercept)                          3.04  2.6918152 3.388185
## as.factor(`rep20_3_rep(pTW20)`)1     0.13 -0.9145543 1.174554

model_10_plasmids_sau <- lm(mic_sau ~ as.factor(`rep19c_2_rep(pvSw4)`) ,data=mb_xylosus_plasmids)
summary(model_10_plasmids_sau)

## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep19c_2_rep(pvSw4)`), data = mb_xylosus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4700 -0.2567 -0.1267  0.1113  1.0483 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        3.04167    0.12396  24.538    4e-14 ***
## as.factor(`rep19c_2_rep(pvSw4)`)1  0.03833    0.21470   0.179    0.861    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4294 on 16 degrees of freedom
## Multiple R-squared:  0.001988,   Adjusted R-squared:  -0.06039 
## F-statistic: 0.03188 on 1 and 16 DF,  p-value: 0.8605

Confint(model_10_plasmids_sau)

##                                     Estimate      2.5 %    97.5 %
## (Intercept)                       3.04166667  2.7788886 3.3044447
## as.factor(`rep19c_2_rep(pvSw4)`)1 0.03833333 -0.4168116 0.4934783

model_10_plasmids_sub <- lm(mic_sub ~as.factor(`rep19c_2_rep(pvSw4)`),data=mb_xylosus_plasmids)
summary(model_10_plasmids_sub)

## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep19c_2_rep(pvSw4)`), data = mb_xylosus_plasmids)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.33167 -0.36042 -0.09083  0.58333  1.04833 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        3.04167    0.18999  16.010 2.86e-11 ***
## as.factor(`rep19c_2_rep(pvSw4)`)1  0.03833    0.32907   0.116    0.909    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6581 on 16 degrees of freedom
## Multiple R-squared:  0.0008474,  Adjusted R-squared:  -0.0616 
## F-statistic: 0.01357 on 1 and 16 DF,  p-value: 0.9087

Confint(model_10_plasmids_sub)

##                                     Estimate      2.5 %    97.5 %
## (Intercept)                       3.04166667  2.6389147 3.4444186
## as.factor(`rep19c_2_rep(pvSw4)`)1 0.03833333 -0.6592536 0.7359202

model_11_plasmids_sau <- lm(mic_sau ~ as.factor(`rep7a_6_SAP108C001(SAP108C)`) ,data=mb_xylosus_plasmids)
summary(model_11_plasmids_sau)

## 
## Call:
## lm(formula = mic_sau ~ as.factor(`rep7a_6_SAP108C001(SAP108C)`), 
##     data = mb_xylosus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4725 -0.2975 -0.0475  0.0850  1.0075 
## 
## Coefficients:
##                                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)                                 3.0825     0.1054  29.252 2.55e-15
## as.factor(`rep7a_6_SAP108C001(SAP108C)`)1  -0.2525     0.3161  -0.799    0.436
##                                              
## (Intercept)                               ***
## as.factor(`rep7a_6_SAP108C001(SAP108C)`)1    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4215 on 16 degrees of freedom
## Multiple R-squared:  0.03834,    Adjusted R-squared:  -0.02176 
## F-statistic: 0.638 on 1 and 16 DF,  p-value: 0.4361

Confint(model_11_plasmids_sau)

##                                           Estimate      2.5 %    97.5 %
## (Intercept)                                 3.0825  2.8591109 3.3058891
## as.factor(`rep7a_6_SAP108C001(SAP108C)`)1  -0.2525 -0.9226673 0.4176673

model_11_plasmids_sub <- lm(mic_sub ~as.factor(`rep7a_6_SAP108C001(SAP108C)`),data=mb_xylosus_plasmids)
summary(model_11_plasmids_sub)

## 
## Call:
## lm(formula = mic_sub ~ as.factor(`rep7a_6_SAP108C001(SAP108C)`), 
##     data = mb_xylosus_plasmids)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3100 -0.3875 -0.0500  0.4275  1.0700 
## 
## Coefficients:
##                                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)                                 3.0200     0.1626  18.577 2.98e-12
## as.factor(`rep7a_6_SAP108C001(SAP108C)`)1   0.3100     0.4877   0.636    0.534
##                                              
## (Intercept)                               ***
## as.factor(`rep7a_6_SAP108C001(SAP108C)`)1    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6502 on 16 degrees of freedom
## Multiple R-squared:  0.02463,    Adjusted R-squared:  -0.03633 
## F-statistic: 0.4041 on 1 and 16 DF,  p-value: 0.534

Confint(model_11_plasmids_sub)

##                                           Estimate      2.5 %   97.5 %
## (Intercept)                                   3.02  2.6753829 3.364617
## as.factor(`rep7a_6_SAP108C001(SAP108C)`)1     0.31 -0.7238512 1.343851

4.2 Visualizations

# ---- amr genes ----


# Staphylococcus aureus

amr_plot_1 <- amr_sau %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(amr_sau$wgs_isolate_id,amr_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


# Staphylococcus chromogenes

amr_plot_2 <- amr_sch %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(amr_sch$wgs_isolate_id,amr_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

# Staphylococcus haemolyticus

amr_plot_3 <- amr_shaem %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(amr_shaem$wgs_isolate_id,amr_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

# Staphylococcus sciuri

amr_plot_4 <- amr_ssc %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(amr_ssc$wgs_isolate_id,amr_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Mammaliicoccus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

#Staphylococcus succinus

amr_plot_5 <- amr_ssuc %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(amr_ssuc$wgs_isolate_id,amr_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


# Staphylococcus xylosus/saprophytiucs

amr_plot_6 <- amr_sxyl_spxyl %>%
  ggplot(aes(x = gene_abbreviation, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

# Others

amr_plot_7 <- amr_other %>%
  ggplot(aes(x = gene_abbreviation, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "ARGs",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x = element_text(angle = 45, hjust = 1))


# Multipanel figure with different species

amr_plot <- ggarrange(amr_plot_1 + rremove("xlab")+ rremove("ylab"),amr_plot_2 + rremove("xlab")+ rremove("ylab"),amr_plot_3 + rremove("xlab")+ rremove("ylab"),amr_plot_4 + rremove("xlab")+ rremove("ylab"),amr_plot_5 + rremove("xlab")+ rremove("ylab"),amr_plot_6 + rremove("xlab")+ rremove("ylab"),amr_plot_7 + rremove("xlab")+ rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2.25))

annotate_figure(amr_plot,
                bottom = text_grob("Gene", color = "black",
                                   hjust = 0.75, x = 0.5),
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/amr.png",width = 20, height = 40, units = "cm")





# ---- plasmid genes ----


# Staphylococcus aureus

plasmid_plot_1 <- plasmid_sau %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(plasmid_sau$wgs_isolate_id,plasmid_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


# Staphylococcus chromogenes

plasmid_plot_2 <- plasmid_sch %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(plasmid_sch$wgs_isolate_id,plasmid_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

# Staphylococcus haemolyticus

plasmid_plot_3 <- plasmid_shaem %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(plasmid_shaem$wgs_isolate_id,plasmid_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

# Staphylococcus sciuri

plasmid_plot_4 <- plasmid_ssc %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(plasmid_ssc$wgs_isolate_id,plasmid_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Mammaliicoccus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

#Staphylococcus succinus

plasmid_plot_5 <- plasmid_ssuc %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(plasmid_ssuc$wgs_isolate_id,plasmid_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


# Staphylococcus xylosus/saprophytiucs

plasmid_plot_6 <- plasmid_sxyl_spxyl %>%
  ggplot(aes(x = gene_abbreviation, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

# Others

plasmid_plot_7 <- plasmid_other %>%
  ggplot(aes(x = gene_abbreviation, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Plasmids",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x = element_text(angle = 45, hjust = 1))


# Multipanel figure with different species

plasmid_plot <- ggarrange(plasmid_plot_1 + rremove("xlab")+ rremove("ylab"),plasmid_plot_2 + rremove("xlab")+ rremove("ylab"),plasmid_plot_3 + rremove("xlab")+ rremove("ylab"),plasmid_plot_4 + rremove("xlab")+ rremove("ylab"),plasmid_plot_5 + rremove("xlab")+ rremove("ylab"),plasmid_plot_6 + rremove("xlab")+ rremove("ylab"),plasmid_plot_7 + rremove("xlab")+ rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2.25))

annotate_figure(plasmid_plot,
                bottom = text_grob("Gene", color = "black",
                                   hjust = 0.75, x = 0.5),
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/plasmid.png",width = 20, height = 40, units = "cm")


amr_plasmid_plot <- ggarrange(
  amr_plot_1 + rremove("xlab") + rremove("ylab"),
  plasmid_plot_1 + rremove("xlab") + rremove("ylab") ,
  amr_plot_2 + rremove("xlab") + rremove("ylab"),
  plasmid_plot_2 + rremove("xlab") + rremove("ylab") ,
  amr_plot_3 + rremove("xlab") + rremove("ylab"),
  plasmid_plot_3 + rremove("xlab") + rremove("ylab") ,
  amr_plot_4 + rremove("xlab") + rremove("ylab"),
  plasmid_plot_4 + rremove("xlab") + rremove("ylab") ,
  amr_plot_5 + rremove("xlab") + rremove("ylab"),
  plasmid_plot_5 + rremove("xlab") + rremove("ylab") ,
  amr_plot_6 + rremove("xlab") + rremove("ylab"),
  plasmid_plot_6 + rremove("xlab") + rremove("ylab") ,
  amr_plot_7 + rremove("ylab"),
  plasmid_plot_7 + rremove("ylab") ,
  ncol = 2, nrow = 7, common.legend = TRUE, align = "v",
  labels = c("A1", "A2", "B1", "B2", "C1", "C2", "D1", "D2", "E1", "E2", "F1", "F2", "G1", "G2"),
  heights = c(1.25,4.5, 4.25, 1.75,3,3.5, 2.5)
)

annotate_figure(amr_plasmid_plot, left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/figure_11.pdf",width = 40, height = 40, units = "cm",dpi=600)
ggsave(plot = last_plot(),"./figures/figure_11.png",width = 40, height = 40, units = "cm",dpi=600)
ggsave(plot = last_plot(),"./figures/figure_11.jpeg",width = 40, height = 40, units = "cm",dpi=600)

Exploring the phylogenetic diversity and antimicrobial activity of non-aureus staphylococci and mammaliicocci isolated from teat apices of organic dairy cows. Part 2 - WGS analysis

Felipe Pena Mosca

2025-07-28

1 Assembly process statistics

1.1 All contigs

1.2 Contigs >500 bp (used for analysis)

2 AMPs genes

2.1 SCH

2.2 SHAEM

2.3 SSC

2.4 SSUC

2.5 SXYL & SPXYL

3 Virulence genes

3.1 Summary

3.2 Visualizations

3.2.1 Adherence

3.2.2 Exoenzymes

3.2.3 Immune evasion

3.2.5 Enterotoxins and exotoxins

3.2.6 Other toxins

4 AMR genes and plasmids

4.1 Summary

4.2 Visualizations