docs-matrix-spec/scripts/speculator/main.go
Daniel Wagner-Hall 559747e77a speculator: Sent Content-Type: text/html header
Go is auto-detecting that this is XML (because for some reason we
generate XHTML), and serving it with a Content-Type header text/xml.

This causes the browser to render it as XHTML, which gives interesting
quirks like extra newlines.

This forces the browser to interpret it as HTML.

What we should probably do instead of stop generating XHTML and start
generating HTML. But in the mean time, this will fix the rendering
issues.
2015-11-05 19:18:28 +00:00

414 lines
10 KiB
Go

// speculator allows you to preview pull requests to the matrix.org specification.
// It serves the following HTTP endpoints:
// - / lists open pull requests
// - /spec/123 which renders the spec as html at pull request 123.
// - /diff/rst/123 which gives a diff of the spec's rst at pull request 123.
// - /diff/html/123 which gives a diff of the spec's HTML at pull request 123.
// It is currently woefully inefficient, and there is a lot of low hanging fruit for improvement.
package main
import (
"bytes"
"encoding/json"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"math/rand"
"net/http"
"net/url"
"os"
"os/exec"
"path"
"strconv"
"strings"
"syscall"
"time"
"github.com/hashicorp/golang-lru"
)
type PullRequest struct {
Number int
Base Commit
Head Commit
Title string
User User
HTMLURL string `json:"html_url"`
}
type Commit struct {
SHA string
Repo RequestRepo
}
type RequestRepo struct {
CloneURL string `json:"clone_url"`
}
type User struct {
Login string
HTMLURL string `json:"html_url"`
}
var (
port = flag.Int("port", 9000, "Port on which to listen for HTTP")
allowedMembers map[string]bool
specCache *lru.Cache // string -> []byte
)
func (u *User) IsTrusted() bool {
return allowedMembers[u.Login]
}
const (
pullsPrefix = "https://api.github.com/repos/matrix-org/matrix-doc/pulls"
matrixDocCloneURL = "https://github.com/matrix-org/matrix-doc.git"
)
func gitClone(url string, shared bool) (string, error) {
directory := path.Join("/tmp/matrix-doc", strconv.FormatInt(rand.Int63(), 10))
cmd := exec.Command("git", "clone", url, directory)
if shared {
cmd.Args = append(cmd.Args, "--shared")
}
err := cmd.Run()
if err != nil {
return "", fmt.Errorf("error cloning repo: %v", err)
}
return directory, nil
}
func gitCheckout(path, sha string) error {
return runGitCommand(path, []string{"checkout", sha})
}
func gitFetch(path string) error {
return runGitCommand(path, []string{"fetch"})
}
func runGitCommand(path string, args []string) error {
cmd := exec.Command("git", args...)
cmd.Dir = path
err := cmd.Run()
if err != nil {
return fmt.Errorf("error running %q: %v", strings.Join(cmd.Args, " "), err)
}
return nil
}
func lookupPullRequest(url url.URL, pathPrefix string) (*PullRequest, error) {
if !strings.HasPrefix(url.Path, pathPrefix+"/") {
return nil, fmt.Errorf("invalid path passed: %s expect %s/123", url.Path, pathPrefix)
}
prNumber := url.Path[len(pathPrefix)+1:]
if strings.Contains(prNumber, "/") {
return nil, fmt.Errorf("invalid path passed: %s expect %s/123", url.Path, pathPrefix)
}
resp, err := http.Get(fmt.Sprintf("%s/%s", pullsPrefix, prNumber))
defer resp.Body.Close()
if err != nil {
return nil, fmt.Errorf("error getting pulls: %v", err)
}
dec := json.NewDecoder(resp.Body)
var pr PullRequest
if err := dec.Decode(&pr); err != nil {
return nil, fmt.Errorf("error decoding pulls: %v", err)
}
return &pr, nil
}
func generate(dir string) error {
cmd := exec.Command("python", "gendoc.py", "--nodelete")
cmd.Dir = path.Join(dir, "scripts")
var b bytes.Buffer
cmd.Stderr = &b
err := cmd.Run()
if err != nil {
return fmt.Errorf("error generating spec: %v\nOutput from gendoc:\n%v", err, b.String())
}
return nil
}
func writeError(w http.ResponseWriter, code int, err error) {
w.WriteHeader(code)
io.WriteString(w, fmt.Sprintf("%v\n", err))
}
type server struct {
matrixDocCloneURL string
}
// generateAt generates spec from repo at sha.
// Returns the path where the generation was done.
func (s *server) generateAt(sha string) (dst string, err error) {
err = gitFetch(s.matrixDocCloneURL)
if err != nil {
return
}
dst, err = gitClone(s.matrixDocCloneURL, true)
if err != nil {
return
}
if err = gitCheckout(dst, sha); err != nil {
return
}
err = generate(dst)
return
}
func (s *server) getSHAOf(ref string) (string, error) {
cmd := exec.Command("git", "rev-list", ref, "-n1")
cmd.Dir = path.Join(s.matrixDocCloneURL)
var b bytes.Buffer
cmd.Stdout = &b
err := cmd.Run()
if err != nil {
return "", fmt.Errorf("error generating spec: %v\nOutput from gendoc:\n%v", err, b.String())
}
return strings.TrimSpace(b.String()), nil
}
func (s *server) serveSpec(w http.ResponseWriter, req *http.Request) {
var sha string
if strings.ToLower(req.URL.Path) == "/spec/head" {
originHead, err := s.getSHAOf("origin/master")
if err != nil {
writeError(w, 500, err)
return
}
sha = originHead
} else {
pr, err := lookupPullRequest(*req.URL, "/spec")
if err != nil {
writeError(w, 400, err)
return
}
// We're going to run whatever Python is specified in the pull request, which
// may do bad things, so only trust people we trust.
if err := checkAuth(pr); err != nil {
writeError(w, 403, err)
return
}
sha = pr.Head.SHA
}
if cached, ok := specCache.Get(sha); ok {
w.Write(cached.([]byte))
return
}
dst, err := s.generateAt(sha)
defer os.RemoveAll(dst)
if err != nil {
writeError(w, 500, err)
return
}
b, err := ioutil.ReadFile(path.Join(dst, "scripts/gen/specification.html"))
if err != nil {
writeError(w, 500, fmt.Errorf("Error reading spec: %v", err))
return
}
w.Write(b)
specCache.Add(sha, b)
}
func checkAuth(pr *PullRequest) error {
if !pr.User.IsTrusted() {
return fmt.Errorf("%q is not a trusted pull requester", pr.User.Login)
}
return nil
}
func (s *server) serveRSTDiff(w http.ResponseWriter, req *http.Request) {
pr, err := lookupPullRequest(*req.URL, "/diff/rst")
if err != nil {
writeError(w, 400, err)
return
}
// We're going to run whatever Python is specified in the pull request, which
// may do bad things, so only trust people we trust.
if err := checkAuth(pr); err != nil {
writeError(w, 403, err)
return
}
base, err := s.generateAt(pr.Base.SHA)
defer os.RemoveAll(base)
if err != nil {
writeError(w, 500, err)
return
}
head, err := s.generateAt(pr.Head.SHA)
defer os.RemoveAll(head)
if err != nil {
writeError(w, 500, err)
return
}
diffCmd := exec.Command("diff", "-u", path.Join(base, "scripts", "tmp", "full_spec.rst"), path.Join(head, "scripts", "tmp", "full_spec.rst"))
var diff bytes.Buffer
diffCmd.Stdout = &diff
if err := ignoreExitCodeOne(diffCmd.Run()); err != nil {
writeError(w, 500, fmt.Errorf("error running diff: %v", err))
return
}
w.Write(diff.Bytes())
}
func (s *server) serveHTMLDiff(w http.ResponseWriter, req *http.Request) {
pr, err := lookupPullRequest(*req.URL, "/diff/html")
if err != nil {
writeError(w, 400, err)
return
}
// We're going to run whatever Python is specified in the pull request, which
// may do bad things, so only trust people we trust.
if err := checkAuth(pr); err != nil {
writeError(w, 403, err)
return
}
base, err := s.generateAt(pr.Base.SHA)
defer os.RemoveAll(base)
if err != nil {
writeError(w, 500, err)
return
}
head, err := s.generateAt(pr.Head.SHA)
defer os.RemoveAll(head)
if err != nil {
writeError(w, 500, err)
return
}
htmlDiffer, err := findHTMLDiffer()
if err != nil {
writeError(w, 500, fmt.Errorf("could not find HTML differ"))
return
}
cmd := exec.Command(htmlDiffer, path.Join(base, "scripts", "gen", "specification.html"), path.Join(head, "scripts", "gen", "specification.html"))
var b bytes.Buffer
cmd.Stdout = &b
if err := cmd.Run(); err != nil {
writeError(w, 500, fmt.Errorf("error running HTML differ: %v", err))
return
}
w.Write(b.Bytes())
}
func findHTMLDiffer() (string, error) {
wd, err := os.Getwd()
if err != nil {
return "", err
}
differ := path.Join(wd, "htmldiff.pl")
if _, err := os.Stat(differ); err == nil {
return differ, nil
}
return "", fmt.Errorf("unable to find htmldiff.pl")
}
func listPulls(w http.ResponseWriter, req *http.Request) {
resp, err := http.Get(pullsPrefix)
if err != nil {
writeError(w, 500, err)
return
}
defer resp.Body.Close()
dec := json.NewDecoder(resp.Body)
var pulls []PullRequest
if err := dec.Decode(&pulls); err != nil {
writeError(w, 500, err)
return
}
if len(pulls) == 0 {
io.WriteString(w, "No pull requests found")
return
}
s := "<body><ul>"
for _, pull := range pulls {
s += fmt.Sprintf(`<li>%d: <a href="%s">%s</a>: <a href="%s">%s</a>: <a href="spec/%d">spec</a> <a href="diff/html/%d">spec diff</a> <a href="diff/rst/%d">rst diff</a></li>`,
pull.Number, pull.User.HTMLURL, pull.User.Login, pull.HTMLURL, pull.Title, pull.Number, pull.Number, pull.Number)
}
s += `</ul><div><a href="spec/head">View the spec at head</a></div></body>`
io.WriteString(w, s)
}
func ignoreExitCodeOne(err error) error {
if err == nil {
return err
}
if exiterr, ok := err.(*exec.ExitError); ok {
if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
if status.ExitStatus() == 1 {
return nil
}
}
}
return err
}
func main() {
flag.Parse()
// It would be great to read this from github, but there's no convenient way to do so.
// Most of these memberships are "private", so would require some kind of auth.
allowedMembers = map[string]bool{
"dbkr": true,
"erikjohnston": true,
"illicitonion": true,
"Kegsay": true,
"NegativeMjark": true,
"richvdh": true,
"leonerd": true,
}
if err := initCache(); err != nil {
log.Fatal(err)
}
rand.Seed(time.Now().Unix())
masterCloneDir, err := gitClone(matrixDocCloneURL, false)
if err != nil {
log.Fatal(err)
}
s := server{masterCloneDir}
http.HandleFunc("/spec/", forceHTML(s.serveSpec))
http.HandleFunc("/diff/rst/", forceHTML(s.serveRSTDiff))
http.HandleFunc("/diff/html/", forceHTML(s.serveHTMLDiff))
http.HandleFunc("/healthz", serveText("ok"))
http.HandleFunc("/", listPulls)
fmt.Printf("Listening on port %d\n", *port)
log.Fatal(http.ListenAndServe(fmt.Sprintf(":%d", *port), nil))
}
func forceHTML(h func(w http.ResponseWriter, req *http.Request)) func(w http.ResponseWriter, req *http.Request) {
return func(w http.ResponseWriter, req *http.Request) {
w.Header().Set("Content-Type", "text/html")
h(w, req)
}
}
func serveText(s string) func(http.ResponseWriter, *http.Request) {
return func(w http.ResponseWriter, req *http.Request) {
io.WriteString(w, s)
}
}
func initCache() error {
c, err := lru.New(50) // Evict after 50 entries (i.e. 50 sha1s)
specCache = c
return err
}