#!/usr/bin/env texlua
kpse.set_program_name("luatex")
-- ctanbib -- export ctan entries to bib format
-- Copyright: Michal Hoftich <michal.h21@gmail.com> (2014-2021)
--
-- This work may be distributed and/or modified under the
-- conditions of the LaTeX Project Public License, either version 1.3
-- of this license or (at your option) any later version.
-- The latest version of this license is in
--   http://www.latex-project.org/lppl.txt
-- and version 1.3 or later is part of all distributions of LaTeX
-- version 2005/12/01 or later.
-- 
-- This work has the LPPL maintenance status `maintained'.
-- 
-- The Current Maintainer of this work is Michal Hoftich

local lapp = require "lapp-mk4"

local bibtype = "manual"
local pkgurl = false
local msg = [[ctanbib - convert ctan package information to bibtex format
Usage:
ctanbib [options] name1 name2 ...
Available options:
  -c,--ctan       Use CTAN URL instead of the package homepage 
  -C,--ctanpath   Use package's CTAN path as URL
  -e,--entrytype  (default manual)  Change entry type
  -h,--help       Print this message
  -v,--version    Print version info
  ]]
local args = lapp(msg)
if args.help then
  print(msg)
  os.exit(0)
elseif args.version then
  print "ctanbib version 0.2b"
  os.exit(0)
elseif args.ctan then
  pkgurl = true
end

-- manual is set by default in --entrytype, we don't want it to override the --ctan option
if args.entrytype~="manual" then
  bibtype = args.entrytype
end




local pkgname = args[1]

if not pkgname then 
  print "[ctanbib] Error: missing package name"
  print(msg)
  os.exit(1)
end

local authors_url = "https://ctan.org/xml/2.0/authors"
local license_url = "http://www.ctan.org/xml/2.0/licenses"

-- change that for different title scheme
local titleformat = "The %s package"

local bibtexformat = [[
@manual{$package,
title = {$title},
subtitle = {$subtitle},
author = {$author},
url = {$url},
urldate = {$urldate}, 
date = {$date},
version = {$version},
}
]]

local dom = require('luaxml-domobject')

local load_xml =  function(url, verification)
  -- local command = io.popen("wget -qO- ".. url,"r")
  local command = io.popen("curl --ssl-no-revoke -sS ".. url,"r")
  local verification = verification or ""

  if not command then return nil, "Cannot open Curl" end

  local info = command:read("*all")
  command:close()

  if not info or string.len(info) == 0 or not info:match(verification) then
    return nil, "Cannot load XML", info 
  end
  return dom.parse(info ), info
end

local bibtex_escape = function(a)
  local a = a or ""
  return a:gsub("([%$%{%}%\\])", function(x) 
    if x == "\\" then return "\\textbackslash  " end
    return '\\'..x 
  end)
end

-- we need to use this method temporarily because of a bug in
-- CTAN API
local author_list
local fetch_author_list = function()
  if author_list then return author_list end
  local authors, msg = load_xml(authors_url, "<authors")
  if not authors then 
    print(msg)
    os.exit()
  end
  author_list = {}
  for _, author in ipairs(authors:query_selector("author")) do
    -- save all authors under they ID 
    local id = author:get_attribute("id")
    author_list[id] = author
  end
  return author_list
end

local process_author = function(author)
  local current = {}
  current[#current+1] = author:get_attribute("familyname")
  current[#current+1] = author:get_attribute("givenname")
  -- the author is an organization
  if #current == 1 then
    return "{" .. current[1] .. "}"
  elseif #current > 1 then
    return table.concat(current, ", ")
  end
  return nil -- no author
end

-- get the author name and surname
local get_author = function(author)
  local name = process_author(author)
  if name then return name end
  -- if the package XML doesn't contain author name,
  -- we need to fetch the authors list from CTAN and find
  -- it here
  local author_list =  fetch_author_list()
  local id = author:get_attribute("id")
  -- try to find the author id in list of authors
  -- if everything fails, just return an empty group
  return process_author(author_list[id]) or "{}"
end

local get_authors = function(a)
  local retrieved_authors = {}
  for _, author in ipairs(a) do
    table.insert(retrieved_authors, get_author(author))
  end
  return table.concat(retrieved_authors," and ")
end

local get_title = function(record)
  local title = record:query_selector("name")[1]
  if title then
    title = title:get_text()
    title = title:gsub("^(.)", function(a) return unicode.utf8.upper(a) end)
  else
    title = pkgname
  end
  return string.format(titleformat, bibtex_escape(title))
end

local function verify_record(record)
  -- verify that we found a package
  if not record then return false end
  return #record:query_selector("name") > 0
end

local get_url = function(record,pkgname)
  local home = record:query_selector("home")[1]
  if home then return home:get_attribute("href") end
  return "https://ctan.org/pkg/"..pkgname
end

local get_caption = function(record)
  local caption = record:query_selector("caption")[1]
  if caption then return bibtex_escape(caption:get_text()) end
  return nil
end

local get_version = function(record)
  local version = record:query_selector("version")[1]
  if version then
    return version:get_attribute("number"), version:get_attribute("date")
  end
end

local ctan_url = function(record, pkgname)
  return "https://ctan.org/pkg/"..pkgname
end

local ctan_path = function(record)
  local ctan = record:query_selector("ctan")[1]
  -- some package don't contain the CTAN path
  if not ctan then return get_url(record) end
  local path = ctan:get_attribute("path")
  return path
end


local compile = function(template, records)
  return template:gsub("$([a-z]+)", function(a) 
    return records[a] or ""
  end)
end

for _, pkgname in ipairs(args) do
  local url = "https://ctan.org/xml/2.0/pkg/" .. pkgname .. "?author-name=true"
  local record, info = load_xml(url, "%<entry")

  if not verify_record(record) then
    print("% Cannot find entry for package "..pkgname)
    break
  end

  -- root element is also saved, so we use this trick 
  -- local record = entry.entry

  local e = {}

  e.author = get_authors(record:query_selector("authorref"))
  e.package = pkgname
  e.title = get_title(record)
  e.subtitle = get_caption(record)
  e.url = get_url(record, pkgname)
  -- use the CTAN path as url for the CTAN type
  if pkgurl then
    e.url = ctan_url(record, pkgname)
  elseif args.ctanpath then
    e.url = ctan_path(record)
  end
  e.version, e.date = get_version(record)
  e.urldate = os.date("%Y-%m-%d")

  local result = compile(bibtexformat, e)
  -- update the bibliography type
  result = result:gsub("^@manual", "@" .. bibtype)

  print(result)
end
