@prefix this: . @prefix sub: . @prefix np: . @prefix dct: . @prefix nt: . @prefix npx: . @prefix xsd: . @prefix rdfs: . @prefix orcid: . @prefix prov: . @prefix foaf: . sub:Head { this: a np:Nanopublication; np:hasAssertion sub:assertion; np:hasProvenance sub:provenance; np:hasPublicationInfo sub:pubinfo . } sub:assertion { a , ; dct:creator orcid:0000-0001-9487-5622; dct:publisher ; dct:subject ; rdfs:comment "Ant Colony Optimization (ACO) has served as a widely-utilized metaheuristic algorithm for decades for solving combinatorial optimization problems. Since its initial construction, ACO has seen a wide variety of modifications and connections to Reinforcement Learning (RL). Substantial parallels can be seen as early as 1995 with Ant-Q's relationship with Q-learning, through 2022 with ADACO's connection with Policy Gradient. In this work, we describe ACO, more specifically the Stochastic Gradient Descent ACO algorithm (ACOSGD), explicitly as an off-policy Policy Gradient (PG) method. We also incorporate experience replay into several ACO algorithm variants, including AS, MaxMin-ACO, ACOSGD, ADACO, and our two policy gradient-based versions: PGACO and PPOACO, drawing the connection to elitist ACO strategies. We show that our implementation of PG in ACO with experience replay and a baselined reward update strategy applied to eight TSP problems of varying sizes performs competitively with both fundamental ACO and SGD-based ACO versions. We also show that the replay buffer seems to unilaterally improve the performance of ACO algorithms through an ablation study"; rdfs:label "Ant Colony Optimization with Policy Gradients and Replay"; this:; "john.sheppard@montana.edu"; "July 13 2025"; "2024" . } sub:provenance { sub:assertion prov:wasAttributedTo orcid:0009-0008-8411-2742 . } sub:pubinfo { orcid:0009-0008-8411-2742 foaf:name "Emily Regalado" . this: dct:created "2026-04-30T21:39:47.426Z"^^xsd:dateTime; dct:creator orcid:0009-0008-8411-2742; dct:license ; npx:introduces ; npx:wasCreatedAt ; nt:wasCreatedFromProvenanceTemplate ; nt:wasCreatedFromPubinfoTemplate , ; nt:wasCreatedFromTemplate . sub:sig npx:hasAlgorithm "RSA"; npx:hasPublicKey "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAxzr6UBGMW6c8tegz0babaledWUEQ0PLDE4tp7Iinbe2DZtAtY5JUptKYuStWDZx+QER4808P8dejNWRnBDzgthYJm/AyNSXflHSJhz2+NC+h7RylOLxbwLEQocmyKKiYxa2gT85m6ajVL2M6TnfG67nnK+K2f7iCGL6wYXRITD1q+7+5SWqBdDXIV921W4IKWaD2GJk+NRBoOqQhbsrk8Tn5XsNd7DMYVHk47oMDGbeBnrOIoRPsbBgAcoCsxxhiB9yN6Lf8EUbnlXVEDzJuZk048L1BDZL+6nkA8btTQGP2ijUFWA7rTrod3LjUDQWLZS95njjl867dtmv/znYkzwIDAQAB"; npx:hasSignature "QK0Uq0dM8EDClZWwK1iypzM5Jofx7eS22L4Yyk8y1QSVx7lJke+W4p4J+YgX6SyQ5ArHEcpoJHzdiV/fM2BzLoBO5d4TqI2fXMpyAdEa3MCZBkv2VnG7G27xSBbEEuYQQfKCdCuLpxFTUfq7u6U9225ODch4R53l2xXGGJPhzvwuwAFxphAzJcrDZo8NzhyHbYq3Mp7Y0FZUbbAF6GBwK/qxrRVuUNuhVE6+EMSo9o3cATE/pb5B5YMkOSY2GYfsThybCKX0FETh5T5L8pp4AY3kA8aCW42ZpH0511DkuMpDNvyDArvBmj85jLc7wJaJPV8n2NtpbChXFOrjlMWIug=="; npx:hasSignatureTarget this:; npx:signedBy orcid:0009-0008-8411-2742 . }