@prefix this: <https://w3id.org/np/RAbv_E_U02qVYAHDisjKEUhi7qQYFsjhGqL24QEbWRP78> .
@prefix sub: <https://w3id.org/np/RAbv_E_U02qVYAHDisjKEUhi7qQYFsjhGqL24QEbWRP78/> .
@prefix np: <http://www.nanopub.org/nschema#> .
@prefix dct: <http://purl.org/dc/terms/> .
@prefix nt: <https://w3id.org/np/o/ntemplate/> .
@prefix npx: <http://purl.org/nanopub/x/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix orcid: <https://orcid.org/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .

sub:Head {
  this: a np:Nanopublication;
    np:hasAssertion sub:assertion;
    np:hasProvenance sub:provenance;
    np:hasPublicationInfo sub:pubinfo .
}

sub:assertion {
  <https://doi.org/10.1145/3712256.3726452> a <https://w3id.org/fair/ff/terms/article>,
      <https://w3id.org/fdof/ontology#FAIRDigitalObject>;
    dct:creator orcid:0000-0001-9487-5622;
    dct:publisher <https://ror.org/021nxhr62>;
    dct:subject <http://edamontology.org/topic_3316>;
    rdfs:comment "Ant Colony Optimization (ACO) has served as a widely-utilized metaheuristic algorithm for decades for solving combinatorial optimization problems. Since its initial construction, ACO has seen a wide variety of modifications and connections to Reinforcement Learning (RL). Substantial parallels can be seen as early as 1995 with Ant-Q's relationship with Q-learning, through 2022 with ADACO's connection with Policy Gradient. In this work, we describe ACO, more specifically the Stochastic Gradient Descent ACO algorithm (ACOSGD), explicitly as an off-policy Policy Gradient (PG) method. We also incorporate experience replay into several ACO algorithm variants, including AS, MaxMin-ACO, ACOSGD, ADACO, and our two policy gradient-based versions: PGACO and PPOACO, drawing the connection to elitist ACO strategies. We show that our implementation of PG in ACO with experience replay and a baselined reward update strategy applied to eight TSP problems of varying sizes performs competitively with both fundamental ACO and SGD-based ACO versions. We also show that the replay buffer seems to unilaterally improve the performance of ACO algorithms through an ablation study";
    rdfs:label "Ant Colony Optimization with Policy Gradients and Replay";
    <https://w3id.org/fdof/ontology#hasMetadata> this:;
    <https://www.w3.org/ns/dcat#contactPoint> "john.sheppard@montana.edu";
    <https://www.w3.org/ns/dcat#endDate> "July 13 2025";
    <https://www.w3.org/ns/dcat#startDate> "2024" .
}

sub:provenance {
  sub:assertion prov:wasAttributedTo orcid:0009-0008-8411-2742 .
}

sub:pubinfo {
  orcid:0009-0008-8411-2742 foaf:name "Emily Regalado" .
  
  this: dct:created "2026-04-30T21:39:47.426Z"^^xsd:dateTime;
    dct:creator orcid:0009-0008-8411-2742;
    dct:license <https://creativecommons.org/licenses/by/4.0/>;
    npx:introduces <https://doi.org/10.1145/3712256.3726452>;
    npx:wasCreatedAt <https://nanodash.knowledgepixels.com/>;
    nt:wasCreatedFromProvenanceTemplate <https://w3id.org/np/RA7lSq6MuK_TIC6JMSHvLtee3lpLoZDOqLJCLXevnrPoU>;
    nt:wasCreatedFromPubinfoTemplate <https://w3id.org/np/RACJ58Gvyn91LqCKIO9zu1eijDQIeEff28iyDrJgjSJF8>,
      <https://w3id.org/np/RAukAcWHRDlkqxk7H2XNSegc1WnHI569INvNr-xdptDGI>;
    nt:wasCreatedFromTemplate <https://w3id.org/np/RArM5GTwgxg9qslGX-XiQ-KTTUwdoM0KB1YqmT4GqTizA> .
  
  sub:sig npx:hasAlgorithm "RSA";
    npx:hasPublicKey "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAxzr6UBGMW6c8tegz0babaledWUEQ0PLDE4tp7Iinbe2DZtAtY5JUptKYuStWDZx+QER4808P8dejNWRnBDzgthYJm/AyNSXflHSJhz2+NC+h7RylOLxbwLEQocmyKKiYxa2gT85m6ajVL2M6TnfG67nnK+K2f7iCGL6wYXRITD1q+7+5SWqBdDXIV921W4IKWaD2GJk+NRBoOqQhbsrk8Tn5XsNd7DMYVHk47oMDGbeBnrOIoRPsbBgAcoCsxxhiB9yN6Lf8EUbnlXVEDzJuZk048L1BDZL+6nkA8btTQGP2ijUFWA7rTrod3LjUDQWLZS95njjl867dtmv/znYkzwIDAQAB";
    npx:hasSignature "QK0Uq0dM8EDClZWwK1iypzM5Jofx7eS22L4Yyk8y1QSVx7lJke+W4p4J+YgX6SyQ5ArHEcpoJHzdiV/fM2BzLoBO5d4TqI2fXMpyAdEa3MCZBkv2VnG7G27xSBbEEuYQQfKCdCuLpxFTUfq7u6U9225ODch4R53l2xXGGJPhzvwuwAFxphAzJcrDZo8NzhyHbYq3Mp7Y0FZUbbAF6GBwK/qxrRVuUNuhVE6+EMSo9o3cATE/pb5B5YMkOSY2GYfsThybCKX0FETh5T5L8pp4AY3kA8aCW42ZpH0511DkuMpDNvyDArvBmj85jLc7wJaJPV8n2NtpbChXFOrjlMWIug==";
    npx:hasSignatureTarget this:;
    npx:signedBy orcid:0009-0008-8411-2742 .
}