Browse Source

initial commit

master v0.0.1
arnaucube 5 years ago
parent
commit
ad7cae0e1e
5 changed files with 207 additions and 0 deletions
  1. +3
    -0
      .gitignore
  2. +20
    -0
      README.md
  3. +5
    -0
      go.mod
  4. +50
    -0
      go.sum
  5. +129
    -0
      main.go

+ 3
- 0
.gitignore

@ -0,0 +1,3 @@
link2epub
*.mobi
*.epub

+ 20
- 0
README.md

@ -0,0 +1,20 @@
# link2epub [![Go Report Card](https://goreportcard.com/badge/github.com/arnaucube/link2epub)](https://goreportcard.com/report/github.com/arnaucube/link2epub)
Very simple tool to download articles and convert it to `.epub`/`.mobi` files.
## Download
- Binary can be:
- downloaded from [releases section](https://github.com/arnaucube/link2epub/releases)
- compiled with `go build`
## Usage
Needs [calibre](https://calibre-ebook.com/) in order to convert to `.epub` and `.mobi`.
```bash
./link2epub -l https://link.com/to-the-article
// optionally add extension (by default .mobi)
./link2epub -l https://link.com/to-the-article -type mobi
./link2epub -l https://link.com/to-the-article -type epub
```
Thanks to [@dhole](https://github.com/dhole) for the advisment.

+ 5
- 0
go.mod

@ -0,0 +1,5 @@
module link2epub
go 1.12
require github.com/go-shiori/go-readability v0.0.0-20191021230327-9a7f6996b6cc

+ 50
- 0
go.sum

@ -0,0 +1,50 @@
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk=
github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/go-shiori/dom v0.0.0-20190930082056-9d974a4f8b25 h1:1ZfeL7TG+z4cjtC6XT+drfe23JxaVMwdqyGBh4O4foo=
github.com/go-shiori/dom v0.0.0-20190930082056-9d974a4f8b25/go.mod h1:360KoNl36ftFYhjLHuEty78kWUGw8i1opEicvIDLfRk=
github.com/go-shiori/go-readability v0.0.0-20191021230327-9a7f6996b6cc h1:pxiPFDXo0L61rOb0en++O9wGT3pV+elPPbbEUhy+r/4=
github.com/go-shiori/go-readability v0.0.0-20191021230327-9a7f6996b6cc/go.mod h1:Olbo3XhHmEScSA/zTFEUf2+mq8gTmrlCd0RtgfVpisM=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
github.com/spf13/cobra v0.0.4/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU=
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20190926025831-c00fd9afed17 h1:qPnAdmjNA41t3QBTx2mFGf/SD1IoslhYu7AmdsVzCcs=
golang.org/x/net v0.0.0-20190926025831-c00fd9afed17/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190927073244-c990c680b611 h1:q9u40nxWT5zRClI/uU9dHCiYGottAg6Nzz4YUQyHxdA=
golang.org/x/sys v0.0.0-20190927073244-c990c680b611/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

+ 129
- 0
main.go

@ -0,0 +1,129 @@
package main
import (
"flag"
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
"os/exec"
"regexp"
"strconv"
"strings"
readability "github.com/go-shiori/go-readability"
)
const tmpDir = "tmp"
func main() {
// var typeFlag string
linkFlag := flag.String("l", "", "Link to download")
typeFlag := flag.String("type", "mobi", "Type of epub. Available: mobi (default), epub")
flag.Parse()
if *typeFlag != "mobi" && *typeFlag != "epub" {
log.Fatal("not valid type")
}
err := os.Mkdir(tmpDir, os.ModePerm)
if err != nil {
log.Fatalf("error creating tmp dir %s: %v\n", tmpDir, err)
}
// get link
fmt.Println("\n> getting the link")
resp, err := http.Get(*linkFlag)
if err != nil {
log.Fatalf("failed to download %s: %v\n", *linkFlag, err)
}
defer resp.Body.Close()
// convert the html to simple html with go-readability
article, err := readability.FromReader(resp.Body, *linkFlag)
if err != nil {
log.Fatalf("failed to parse %s: %v\n", *linkFlag, err)
}
fmt.Printf(" URL : %s\n", *linkFlag)
fmt.Printf(" Title : %s\n", article.Title)
fmt.Printf(" Author : %s\n", article.Byline)
fmt.Printf(" Length : %d\n", article.Length)
fmt.Printf(" Excerpt : %s\n", article.Excerpt)
fmt.Printf(" SiteName: %s\n", article.SiteName)
fmt.Printf(" Image : %s\n", article.Image)
fmt.Printf(" Favicon : %s\n", article.Favicon)
// get images
fmt.Println("\n>getting the images")
imgRegex := regexp.MustCompile(`(<img )([^>]*)(src=")([^"]*)"`)
imgs := imgRegex.FindAllSubmatch([]byte(article.Content), -1)
for i, img := range imgs {
fmt.Println(" img", i, string(img[4]))
filename, err := downloadImg(string(img[4]), strconv.Itoa(i))
if err != nil {
log.Fatalf("error in downloadImg %s: %v\n", img[4], err)
}
// replace in the article.Content the current img by new filename
article.Content = strings.Replace(article.Content, string(img[4]), filename, -1)
}
// store html file
filename := article.Title + " - " + article.Byline
out, err := os.Create(tmpDir + "/" + filename + ".html")
if err != nil {
log.Fatalf("failed creating index.xhtml: %v\n", err)
}
defer out.Close()
_, err = out.Write([]byte(article.Content))
if err != nil {
log.Fatalf("failed writting index.html: %v\n", err)
}
out.Sync()
// call calibre to convert the html to epub/mobi
fmt.Println("\n>converting to", *typeFlag)
cmd := exec.Command("ebook-convert", tmpDir+"/"+filename+".html", filename+"."+*typeFlag)
if err := cmd.Run(); err != nil {
log.Fatalf("failed converting the html to %s: %v\n", *typeFlag, err)
}
// delete tmp dir
cmd = exec.Command("rm", "-rf", tmpDir)
if err := cmd.Run(); err != nil {
log.Fatalf("failed removing the tmp dir %s: %v\n", tmpDir, err)
}
}
func downloadImg(url string, path string) (string, error) {
url = strings.Replace(url, "/max/60/", "/max/1000/", -1) // for "medium.com" api
resp, err := http.Get(url)
if err != nil {
return "", err
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return "", err
}
contentType := http.DetectContentType(body)
filename := path + "." + strings.Replace(contentType, "image/", "", -1)
out, err := os.Create(tmpDir + "/" + filename)
if err != nil {
return "", err
}
defer out.Close()
_, err = out.Write(body)
if err != nil {
return "", err
}
out.Sync()
return filename, nil
}

Loading…
Cancel
Save