diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ad51b3a --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +link2epub +*.mobi +*.epub diff --git a/README.md b/README.md new file mode 100644 index 0000000..da04b5e --- /dev/null +++ b/README.md @@ -0,0 +1,20 @@ +# link2epub [![Go Report Card](https://goreportcard.com/badge/github.com/arnaucube/link2epub)](https://goreportcard.com/report/github.com/arnaucube/link2epub) +Very simple tool to download articles and convert it to `.epub`/`.mobi` files. + +## Download +- Binary can be: + - downloaded from [releases section](https://github.com/arnaucube/link2epub/releases) + - compiled with `go build` + +## Usage +Needs [calibre](https://calibre-ebook.com/) in order to convert to `.epub` and `.mobi`. + +```bash +./link2epub -l https://link.com/to-the-article + +// optionally add extension (by default .mobi) +./link2epub -l https://link.com/to-the-article -type mobi +./link2epub -l https://link.com/to-the-article -type epub +``` + +Thanks to [@dhole](https://github.com/dhole) for the advisment. diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..25fae10 --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module link2epub + +go 1.12 + +require github.com/go-shiori/go-readability v0.0.0-20191021230327-9a7f6996b6cc diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..1979e6d --- /dev/null +++ b/go.sum @@ -0,0 +1,50 @@ +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= +github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= +github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= +github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= +github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/go-shiori/dom v0.0.0-20190930082056-9d974a4f8b25 h1:1ZfeL7TG+z4cjtC6XT+drfe23JxaVMwdqyGBh4O4foo= +github.com/go-shiori/dom v0.0.0-20190930082056-9d974a4f8b25/go.mod h1:360KoNl36ftFYhjLHuEty78kWUGw8i1opEicvIDLfRk= +github.com/go-shiori/go-readability v0.0.0-20191021230327-9a7f6996b6cc h1:pxiPFDXo0L61rOb0en++O9wGT3pV+elPPbbEUhy+r/4= +github.com/go-shiori/go-readability v0.0.0-20191021230327-9a7f6996b6cc/go.mod h1:Olbo3XhHmEScSA/zTFEUf2+mq8gTmrlCd0RtgfVpisM= +github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= +github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= +github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= +github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= +github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4= +github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= +github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= +github.com/spf13/cobra v0.0.4/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= +github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= +github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= +github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= +golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/net v0.0.0-20190926025831-c00fd9afed17 h1:qPnAdmjNA41t3QBTx2mFGf/SD1IoslhYu7AmdsVzCcs= +golang.org/x/net v0.0.0-20190926025831-c00fd9afed17/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190927073244-c990c680b611 h1:q9u40nxWT5zRClI/uU9dHCiYGottAg6Nzz4YUQyHxdA= +golang.org/x/sys v0.0.0-20190927073244-c990c680b611/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/main.go b/main.go new file mode 100644 index 0000000..63096ff --- /dev/null +++ b/main.go @@ -0,0 +1,129 @@ +package main + +import ( + "flag" + "fmt" + "io/ioutil" + "log" + "net/http" + "os" + "os/exec" + "regexp" + "strconv" + "strings" + + readability "github.com/go-shiori/go-readability" +) + +const tmpDir = "tmp" + +func main() { + // var typeFlag string + linkFlag := flag.String("l", "", "Link to download") + typeFlag := flag.String("type", "mobi", "Type of epub. Available: mobi (default), epub") + + flag.Parse() + + if *typeFlag != "mobi" && *typeFlag != "epub" { + log.Fatal("not valid type") + } + err := os.Mkdir(tmpDir, os.ModePerm) + if err != nil { + log.Fatalf("error creating tmp dir %s: %v\n", tmpDir, err) + } + + // get link + fmt.Println("\n> getting the link") + resp, err := http.Get(*linkFlag) + if err != nil { + log.Fatalf("failed to download %s: %v\n", *linkFlag, err) + } + defer resp.Body.Close() + + // convert the html to simple html with go-readability + article, err := readability.FromReader(resp.Body, *linkFlag) + if err != nil { + log.Fatalf("failed to parse %s: %v\n", *linkFlag, err) + } + + fmt.Printf(" URL : %s\n", *linkFlag) + fmt.Printf(" Title : %s\n", article.Title) + fmt.Printf(" Author : %s\n", article.Byline) + fmt.Printf(" Length : %d\n", article.Length) + fmt.Printf(" Excerpt : %s\n", article.Excerpt) + fmt.Printf(" SiteName: %s\n", article.SiteName) + fmt.Printf(" Image : %s\n", article.Image) + fmt.Printf(" Favicon : %s\n", article.Favicon) + + // get images + fmt.Println("\n>getting the images") + imgRegex := regexp.MustCompile(`(]*)(src=")([^"]*)"`) + imgs := imgRegex.FindAllSubmatch([]byte(article.Content), -1) + for i, img := range imgs { + fmt.Println(" img", i, string(img[4])) + filename, err := downloadImg(string(img[4]), strconv.Itoa(i)) + if err != nil { + log.Fatalf("error in downloadImg %s: %v\n", img[4], err) + } + + // replace in the article.Content the current img by new filename + article.Content = strings.Replace(article.Content, string(img[4]), filename, -1) + } + + // store html file + filename := article.Title + " - " + article.Byline + out, err := os.Create(tmpDir + "/" + filename + ".html") + if err != nil { + log.Fatalf("failed creating index.xhtml: %v\n", err) + } + defer out.Close() + + _, err = out.Write([]byte(article.Content)) + if err != nil { + log.Fatalf("failed writting index.html: %v\n", err) + } + out.Sync() + + // call calibre to convert the html to epub/mobi + fmt.Println("\n>converting to", *typeFlag) + cmd := exec.Command("ebook-convert", tmpDir+"/"+filename+".html", filename+"."+*typeFlag) + if err := cmd.Run(); err != nil { + log.Fatalf("failed converting the html to %s: %v\n", *typeFlag, err) + } + + // delete tmp dir + cmd = exec.Command("rm", "-rf", tmpDir) + if err := cmd.Run(); err != nil { + log.Fatalf("failed removing the tmp dir %s: %v\n", tmpDir, err) + } +} + +func downloadImg(url string, path string) (string, error) { + url = strings.Replace(url, "/max/60/", "/max/1000/", -1) // for "medium.com" api + resp, err := http.Get(url) + if err != nil { + return "", err + } + defer resp.Body.Close() + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return "", err + } + + contentType := http.DetectContentType(body) + filename := path + "." + strings.Replace(contentType, "image/", "", -1) + + out, err := os.Create(tmpDir + "/" + filename) + if err != nil { + return "", err + } + defer out.Close() + + _, err = out.Write(body) + if err != nil { + return "", err + } + out.Sync() + + return filename, nil +}