download

package
v1.0.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 16, 2026 License: MIT Imports: 11 Imported by: 0

Documentation

Index

Constants

View Source
const (
	JSONFile      = "metadata.jsonl"
	PDFDirectory  = "pdfs/"
	TextDirectory = "texts/"
)

Variables

This section is empty.

Functions

func DownloadArxivPapers

func DownloadArxivPapers(ctx context.Context, searchQuery string, numResults int, saveMetadata, savePDFs, saveSummaries bool) error

Types

type ArxivPaper

type ArxivPaper struct {
	ID              string   `json:"id"`
	Updated         string   `json:"updated"`
	Published       string   `json:"published"`
	Title           string   `json:"title"`
	Summary         string   `json:"-"` // skip in JSON like Rust
	Authors         []string `json:"authors"`
	PrimaryCategory string   `json:"primary_category"`
	Categories      []string `json:"categories"`
	PDFURL          string   `json:"pdf_url"`
	HTMLURL         string   `json:"html_url"`
	Comment         *string  `json:"comment,omitempty"`
}

func (*ArxivPaper) FetchPDF

func (p *ArxivPaper) FetchPDF(ctx context.Context, outPath string) error

func (*ArxivPaper) WriteSummary

func (p *ArxivPaper) WriteSummary(outPath string) error

type Author

type Author struct {
	Name string `xml:"name"`
}

type Category

type Category struct {
	Term string `xml:"term,attr"`
}

type Comment

type Comment struct {
	XMLName xml.Name `xml:"http://arxiv.org/schemas/atom comment"`
	Value   string   `xml:",chardata"`
}

type Entry

type Entry struct {
	XMLName    xml.Name   `xml:"entry"`
	ID         string     `xml:"id"`
	Updated    string     `xml:"updated"`
	Published  string     `xml:"published"`
	Title      string     `xml:"title"`
	Summary    string     `xml:"summary"`
	Authors    []Author   `xml:"author"`
	Links      []Link     `xml:"link"`
	Categories []Category `xml:"category"`
	Comment    Comment    `xml:"http://arxiv.org/schemas/atom comment"`
}

type Feed

type Feed struct {
	XMLName xml.Name `xml:"feed"`
	Entries []Entry  `xml:"entry"`
}

Atom XML structures for parsing arXiv API response

type Link struct {
	Type  string `xml:"type,attr"`
	HRef  string `xml:"href,attr"`
	Rel   string `xml:"rel,attr"`
	Title string `xml:"title,attr"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL