[
  {
    "title": "Accurate identification of polyadenylation sites from 3' end deep \nsequencing using a naive Bayes classifier",
    "author": [
      {
        "given": "Sarah",
        "family": "Sheppard",
        "role": {},
        "email": {},
        "comment": {}
      },
      {
        "given": "Nathan",
        "family": "Lawson",
        "role": {},
        "email": {},
        "comment": {}
      },
      {
        "given": "Lihua",
        "family": "Zhu",
        "role": {},
        "email": {},
        "comment": {}
      }
    ],
    "journal": "Bioinformatics",
    "volume": "29",
    "year": "2013",
    "number": "20",
    "pages": "2564",
    "url": "http://bioinformatics.oxfordjournals.org/content/29/20/2564.long",
    "doi": "10.1093/bioinformatics/btt446",
    "pubmedid": "23962617",
    "issn": "1460-2059",
    "abstract": "MOTIVATION: 3' end processing is important for transcription \ntermination, mRNA stability and regulation of gene expression. To identify 3' \nends, most techniques use an oligo-dT primer to construct deep sequencing \nlibraries. However, this approach can lead to identification of artifactual \npolyadenylation sites due to internal priming in homopolymeric stretches of \nadenines. Although heuristic filters have been applied in these cases, they \ntypically result in a high proportion of both false-positive and -negative \nclassifications. Therefore, there is a need to develop improved algorithms to \nbetter identify mis-priming events in oligo-dT primed sequences. \nRESULTS: By analyzing sequence features flanking 3' ends derived from oligo-dT-based sequencing, we developed a naive Bayes classifier to classify them as true or \nfalse/internally primed. The resulting algorithm is highly accurate, outperforms \nprevious heuristic filters and facilitates identification of novel \npolyadenylation sites."
  }
]
