Commit: 9171f5a232a2aa4a49c732226946eb28be35b37c
Parent: dd2f3a6212170fc29b8b22d96e1ca91217cf018f
Author: Randy Palamar
Date:   Mon,  1 Aug 2022 11:45:29 -0600
move maps to seperate file
Diffstat:
| M | Makefile | | | 4 | ++-- | 
| A | aozora/main.go | | | 208 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | 
| D | aozora2fmt.go | | | 363 | ------------------------------------------------------------------------------- | 
| A | go.mod | | | 3 | +++ | 
| A | maps.go | | | 158 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | 
5 files changed, 371 insertions(+), 365 deletions(-)
diff --git a/Makefile b/Makefile
@@ -7,8 +7,8 @@ BINS = aozora2fmt
 
 default: $(BINS)
 
-aozora2fmt: aozora2fmt.go
-	go build -ldflags "$(LDFLAGS)" $@.go
+aozora2fmt: aozora/main.go
+	go build -ldflags "$(LDFLAGS)" -o $@ aozora/main.go
 
 install: $(BINS)
 	mkdir -p $(PREFIX)/bin
diff --git a/aozora/main.go b/aozora/main.go
@@ -0,0 +1,208 @@
+/* See LICENSE for license details. */
+package main
+
+import (
+	"bufio"
+	"flag"
+	"fmt"
+	"log"
+	"os"
+	"regexp"
+	"strconv"
+	"strings"
+	"unicode/utf8"
+
+	"aozora2fmt"
+)
+
+type OutFmt struct {
+	ruby  string /* Ruby output format */
+	hdr   string /* Header format */
+	shdr  string /* Subheader format */
+	sshdr string /* Subsubheader format */
+	pb    string /* Page Break text */
+}
+
+func usage() {
+	fmt.Fprintf(os.Stderr, "usage: %s [-d] [-f format] file\n", os.Args[0])
+	flag.PrintDefaults()
+}
+
+func get_outfmt(fmt string) *OutFmt {
+	of := new(OutFmt)
+
+	switch fmt {
+	case "tex":
+		of.ruby  = "\\ruby{%s}{%s}"
+		of.hdr   = "\\chapter{%s}"
+		of.shdr  = "\\section*{%s}"
+		of.sshdr = "\\subsection*{%s}"
+		of.pb    = "\\newpage"
+	case "md":
+		of.ruby  = "<ruby>%s<rp>《</rp><rt>%s</rt><rp>》</rp></ruby>"
+		of.hdr   = "# %s"
+		of.shdr  = "## %s"
+		of.sshdr = "### %s"
+		of.pb    = "<div style='break-after:always'></div>"
+	case "plain":
+		of.ruby  = "[%s:%s]"
+		of.hdr   = "%s"
+		of.shdr  = "%s"
+		of.sshdr = "%s"
+		of.pb    = ""
+	}
+
+	return of
+}
+
+func replace_jis(str string) string {
+	exp := regexp.MustCompile(`※[#([^]]+)]`)
+
+	for _, matches := range exp.FindAllStringSubmatch(str, -1) {
+		sub_exp := regexp.MustCompile(`第(\d)水準(\d)-(\d\d)-(\d\d)`)
+	
+		nums := sub_exp.FindStringSubmatch(str)
+		if nums == nil {
+			/* the same character appeared multiple times in str */
+			continue
+		}
+		num, _ := strconv.Atoi(nums[1] + nums[2] + nums[3] + nums[4])
+
+		m := aozora2fmt.JisMap()
+		replacement, ok := m[num]
+		if !ok {
+			log.Printf("jis code not implemented: %d: %s\n", num, matches[0])
+			continue
+		}
+
+		str = strings.Replace(str, matches[0], replacement, -1)
+	}
+
+	return str
+}
+
+func replace_ruby(str string, of *OutFmt) string {
+	kanji := `\x{3400}-\x{4DBF}` +   /* CJK Unified Ideographs Extension A */
+		 `\x{4E00}-\x{9FFF}` +   /* CJK Unified Ideographs */
+		 `\x{F900}-\x{FAFF}` +   /* CJK Compatibility Ideographs */
+		 `\x{20000}-\x{2FA1F}` + /* CJK Unified Ideographs Extension B - F, Supplement */
+		 `〆〻〇々ヶ`
+	ruby_exp := regexp.MustCompile(`[|]?([` + kanji + `]+)《([^》]+)》`)
+	for _, matches := range ruby_exp.FindAllStringSubmatch(str, -1) {
+		replacement := fmt.Sprintf(of.ruby, matches[1], matches[2])
+		str = strings.Replace(str, matches[0], replacement, -1)
+	}
+
+	bouten_exp := regexp.MustCompile(`[#「([^」]+)」に傍点]`)
+	for _, matches := range bouten_exp.FindAllStringSubmatch(str, -1) {
+		bouten := strings.Repeat("﹅", utf8.RuneCountInString(matches[1]))
+		replacement := fmt.Sprintf(of.ruby, matches[1], bouten)
+		str = strings.Replace(str, matches[1] + matches[0], replacement, -1)
+	}
+
+	return str
+}
+
+func replace_accents(str string) string {
+	exp := regexp.MustCompile(`〔([^〕]+)〕`)
+	
+	for _, matches := range exp.FindAllStringSubmatch(str, -1) {
+		str = strings.Replace(str, matches[0], matches[1], -1)
+
+		m := aozora2fmt.AccentMap()
+		for key := range m {
+			str = strings.ReplaceAll(str, key, m[key])
+		}
+	}
+
+	return str
+}
+
+func replace_hdrs(str string, of *OutFmt) string {
+	exp := regexp.MustCompile(`\n\n[[^[]+[#「([^」]+)」は([大中小])見出し]\n\n\n`)
+	slices := exp.FindAllStringSubmatch(str, -1)
+	if slices == nil {
+		exp = regexp.MustCompile(`\n\n\n([^\n]+)\n\n\n`)
+		for _, matches := range exp.FindAllStringSubmatch(str, -1) {
+			replacement := "\n" + fmt.Sprintf(of.hdr, matches[1]) + "\n"
+			str = strings.Replace(str, matches[0], replacement, -1)
+		}
+		return str
+	}
+
+	for _, matches := range slices {
+		var replacement string
+		switch matches[2] {
+		case "大":
+			replacement = fmt.Sprintf(of.hdr, matches[1])
+		case "中":
+			replacement = fmt.Sprintf(of.shdr, matches[1])
+		case "小":
+			replacement = fmt.Sprintf(of.sshdr, matches[1])
+		default:
+			log.Printf("bad hdr: %s\n", matches[0])
+			replacement = matches[1]
+		}
+		str = strings.Replace(str, matches[0], replacement + "\n", -1)
+	}
+
+	return str
+}
+
+func trim_info(str string) string {
+	delim := "\n" + strings.Repeat("-", 55) + "\n"
+
+	slices := strings.Split(str, delim)
+
+	return strings.Join([]string{slices[0], slices[2]}, "")
+}
+
+func parse(file string, of *OutFmt, debug bool) string {
+	f, err := os.Open(file)
+	defer f.Close()
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	var lines []string
+	r := bufio.NewScanner(f)
+	for r.Scan() {
+		line := strings.Trim(r.Text(), " ")
+		line = replace_jis(line)
+		line = replace_ruby(line, of)
+		line = replace_accents(line)
+		lines = append(lines, line)
+	}
+
+	out := strings.Join(lines, "\n\n");
+	out = replace_hdrs(out, of)
+	out = strings.Replace(out, "[#改ページ]", of.pb, -1)
+
+	if (debug == false) {
+		out = trim_info(out)
+	}
+
+	return out
+}
+
+func main() {
+	var (
+		debug = flag.Bool("d", false, "debug mode")
+		format = flag.String("f", "plain", "output format [plain|md|tex]")
+	)
+
+	flag.Usage = usage
+	flag.Parse()
+
+	if flag.NArg() != 1 {
+		usage()
+		os.Exit(1)
+	}
+
+	log.SetFlags(log.Lshortfile)
+
+	of := get_outfmt(*format)
+	out := parse(flag.Arg(0), of, *debug) 
+
+	fmt.Printf("%s\n", out)
+}
diff --git a/aozora2fmt.go b/aozora2fmt.go
@@ -1,363 +0,0 @@
-/* See LICENSE for license details. */
-package main
-
-import (
-	"bufio"
-	"flag"
-	"fmt"
-	"log"
-	"os"
-	"regexp"
-	"strconv"
-	"strings"
-	"unicode/utf8"
-)
-
-type OutFmt struct {
-	ruby  string /* Ruby output format */
-	hdr   string /* Header format */
-	shdr  string /* Subheader format */
-	sshdr string /* Subsubheader format */
-	pb    string /* Page Break text */
-}
-
-func usage() {
-	fmt.Fprintf(os.Stderr, "usage: %s [-d] [-f format] file\n", os.Args[0])
-	flag.PrintDefaults()
-}
-
-func get_outfmt(fmt string) *OutFmt {
-	of := new(OutFmt)
-
-	switch fmt {
-	case "tex":
-		of.ruby  = "\\ruby{%s}{%s}"
-		of.hdr   = "\\chapter{%s}"
-		of.shdr  = "\\section*{%s}"
-		of.sshdr = "\\subsection*{%s}"
-		of.pb    = "\\newpage"
-	case "md":
-		of.ruby  = "<ruby>%s<rp>《</rp><rt>%s</rt><rp>》</rp></ruby>"
-		of.hdr   = "# %s"
-		of.shdr  = "## %s"
-		of.sshdr = "### %s"
-		of.pb    = "<div style='break-after:always'></div>"
-	case "plain":
-		of.ruby  = "[%s:%s]"
-		of.hdr   = "%s"
-		of.shdr  = "%s"
-		of.sshdr = "%s"
-		of.pb    = ""
-	}
-
-	return of
-}
-
-func accent_map() map[string]string {
-	/* https://web.archive.org/web/20220206093806/http://aozora.gr.jp/accent_separation.html */
-	return map[string]string {
-		"A&": "Å",
-		"A'": "Á",
-		"A:": "Ä",
-		"AE&": "Æ",
-		"A^": "Â",
-		"A_": "Ā",
-		"A`": "À",
-		"A~": "Ã",
-		"C'": "Ć",
-		"C,": "Ç",
-		"C^": "Ĉ",
-		"D/": "Đ",
-		"E'": "É",
-		"E:": "Ë",
-		"E^": "Ê",
-		"E_": "Ē",
-		"E`": "È",
-		"E~": "Ẽ",
-		"G^": "Ĝ",
-		"H^": "Ĥ",
-		"I'": "Í",
-		"I:": "Ï",
-		"I^": "Î",
-		"I_": "Ī",
-		"I`": "Ì",
-		"I~": "Ĩ",
-		"J^": "Ĵ",
-		"L'": "Ĺ",
-		"L/": "Ł",
-		"M'": "Ḿ",
-		"N'": "Ń",
-		"N`": "Ǹ",
-		"N~": "Ñ",
-		"O'": "Ó",
-		"O/": "Ø",
-		"O:": "Ö",
-		"OE&": "Œ",
-		"O^": "Ô",
-		"O_": "Ō",
-		"O`": "Ò",
-		"O~": "Õ",
-		"R'": "Ŕ",
-		"S'": "Ś",
-		"S,": "Ş",
-		"S^": "Ŝ",
-		"T,": "Ţ",
-		"U&": "Ů",
-		"U'": "Ú",
-		"U:": "Ü",
-		"U^": "Û",
-		"U_": "Ū",
-		"U`": "Ù",
-		"U~": "Ũ",
-		"Y'": "Ý",
-		"Z'": "Ź",
-		"a&": "å",
-		"a'": "á",
-		"a:": "ä",
-		"a^": "â",
-		"a_": "ā",
-		"a`": "à",
-		"ae&": "æ",
-		"a~": "ã",
-		"c'": "ć",
-		"c,": "ç",
-		"c^": "ĉ",
-		"d/": "đ",
-		"e'": "é",
-		"e:": "ë",
-		"e^": "ê",
-		"e_": "ē",
-		"e`": "è",
-		"e~": "ẽ",
-		"g^": "ĝ",
-		"h/": "ħ",
-		"h^": "ĥ",
-		"i'": "í",
-		"i/": "ɨ",
-		"i:": "ï",
-		"i^": "î",
-		"i_": "ī",
-		"i`": "ì",
-		"i~": "ĩ",
-		"j^": "ĵ",
-		"l'": "ĺ",
-		"l/": "ł",
-		"m'": "ḿ",
-		"n'": "ń",
-		"n`": "ǹ",
-		"n~": "ñ",
-		"o'": "ó",
-		"o/": "ø",
-		"o:": "ö",
-		"o^": "ô",
-		"o_": "ō",
-		"o`": "ò",
-		"oe&": "œ",
-		"o~": "õ",
-		"r'": "ŕ",
-		"s&": "ß",
-		"s'": "ś",
-		"s,": "ş",
-		"s^": "ŝ",
-		"t,": "ţ",
-		"u&": "ů",
-		"u'": "ú",
-		"u:": "ü",
-		"u^": "û",
-		"u_": "ū",
-		"u`": "ù",
-		"u~": "ũ",
-		"y'": "ý",
-		"y:": "ÿ",
-		"z'": "ź",
-	}
-}
-
-func jis_map() map[int]string {
-	/* https://kanji.jitenon.jp/ */
-	/* http://www13.plala.or.jp/bigdata/index_kanji.html */
-	return map[int]string {
-		311476: "匇",
-		311524: "噱",
-		311589: "媧",
-		318428: "彘",
-		318431: "彽",
-		318445: "怳",
-		318454: "惝",
-		318455: "惸",
-		318459: "愷",
-		318466: "戢",
-		318477: "挘",
-		318615: "橛",
-		318662: "泫",
-		318740: "炷",
-		318764: "燄",
-		318771: "犍",
-		318822: "璆",
-		318881: "眶",
-		318885: "睜",
-		319155: "蛼",
-		319239: "蹰",
-		319278: "鄢",
-		319413: "騃",
-		319484: "鼹",
-		421283: "戕",
-		428874: "譃",
-		429267: "餼",
-		429268: "饀",
-		429271: "饍",
-		429337: "魳",
-	}
-}
-
-func replace_jis(str string) string {
-	exp := regexp.MustCompile(`※[#([^]]+)]`)
-
-	for _, matches := range exp.FindAllStringSubmatch(str, -1) {
-		sub_exp := regexp.MustCompile(`第(\d)水準(\d)-(\d\d)-(\d\d)`)
-	
-		nums := sub_exp.FindStringSubmatch(str)
-		if nums == nil {
-			/* the same character appeared multiple times in str */
-			continue
-		}
-		num, _ := strconv.Atoi(nums[1] + nums[2] + nums[3] + nums[4])
-
-		m := jis_map()
-		replacement, ok := m[num]
-		if !ok {
-			log.Printf("jis code not implemented: %d: %s\n", num, matches[0])
-			continue
-		}
-
-		str = strings.Replace(str, matches[0], replacement, -1)
-	}
-
-	return str
-}
-
-func replace_ruby(str string, of *OutFmt) string {
-	kanji := `\x{3400}-\x{4DBF}` +   /* CJK Unified Ideographs Extension A */
-		 `\x{4E00}-\x{9FFF}` +   /* CJK Unified Ideographs */
-		 `\x{F900}-\x{FAFF}` +   /* CJK Compatibility Ideographs */
-		 `\x{20000}-\x{2FA1F}` + /* CJK Unified Ideographs Extension B - F, Supplement */
-		 `〆〻〇々ヶ`
-	ruby_exp := regexp.MustCompile(`[|]?([` + kanji + `]+)《([^》]+)》`)
-	for _, matches := range ruby_exp.FindAllStringSubmatch(str, -1) {
-		replacement := fmt.Sprintf(of.ruby, matches[1], matches[2])
-		str = strings.Replace(str, matches[0], replacement, -1)
-	}
-
-	bouten_exp := regexp.MustCompile(`[#「([^」]+)」に傍点]`)
-	for _, matches := range bouten_exp.FindAllStringSubmatch(str, -1) {
-		bouten := strings.Repeat("﹅", utf8.RuneCountInString(matches[1]))
-		replacement := fmt.Sprintf(of.ruby, matches[1], bouten)
-		str = strings.Replace(str, matches[1] + matches[0], replacement, -1)
-	}
-
-	return str
-}
-
-func replace_accents(str string) string {
-	exp := regexp.MustCompile(`〔([^〕]+)〕`)
-	
-	for _, matches := range exp.FindAllStringSubmatch(str, -1) {
-		str = strings.Replace(str, matches[0], matches[1], -1)
-
-		m := accent_map()
-		for key := range m {
-			str = strings.ReplaceAll(str, key, m[key])
-		}
-	}
-
-	return str
-}
-
-func replace_hdrs(str string, of *OutFmt) string {
-	exp := regexp.MustCompile(`\n\n[[^[]+[#「([^」]+)」は([大中小])見出し]\n\n\n`)
-	slices := exp.FindAllStringSubmatch(str, -1)
-	if slices == nil {
-		exp = regexp.MustCompile(`\n\n\n([^\n]+)\n\n\n`)
-		for _, matches := range exp.FindAllStringSubmatch(str, -1) {
-			replacement := "\n" + fmt.Sprintf(of.hdr, matches[1]) + "\n"
-			str = strings.Replace(str, matches[0], replacement, -1)
-		}
-		return str
-	}
-
-	for _, matches := range slices {
-		var replacement string
-		switch matches[2] {
-		case "大":
-			replacement = fmt.Sprintf(of.hdr, matches[1])
-		case "中":
-			replacement = fmt.Sprintf(of.shdr, matches[1])
-		case "小":
-			replacement = fmt.Sprintf(of.sshdr, matches[1])
-		default:
-			log.Printf("bad hdr: %s\n", matches[0])
-			replacement = matches[1]
-		}
-		str = strings.Replace(str, matches[0], replacement + "\n", -1)
-	}
-
-	return str
-}
-
-func trim_info(str string) string {
-	delim := "\n" + strings.Repeat("-", 55) + "\n"
-
-	slices := strings.Split(str, delim)
-
-	return strings.Join([]string{slices[0], slices[2]}, "")
-}
-
-func parse(file string, of *OutFmt, debug bool) string {
-	f, err := os.Open(file)
-	defer f.Close()
-	if err != nil {
-		log.Fatal(err)
-	}
-
-	var lines []string
-	r := bufio.NewScanner(f)
-	for r.Scan() {
-		line := strings.Trim(r.Text(), " ")
-		line = replace_jis(line)
-		line = replace_ruby(line, of)
-		line = replace_accents(line)
-		lines = append(lines, line)
-	}
-
-	out := strings.Join(lines, "\n\n");
-	out = replace_hdrs(out, of)
-	out = strings.Replace(out, "[#改ページ]", of.pb, -1)
-
-	if (debug == false) {
-		out = trim_info(out)
-	}
-
-	return out
-}
-
-func main() {
-	var (
-		debug = flag.Bool("d", false, "debug mode")
-		format = flag.String("f", "plain", "output format [plain|md|tex]")
-	)
-
-	flag.Usage = usage
-	flag.Parse()
-
-	if flag.NArg() != 1 {
-		usage()
-		os.Exit(1)
-	}
-
-	log.SetFlags(log.Lshortfile)
-
-	of := get_outfmt(*format)
-	out := parse(flag.Arg(0), of, *debug) 
-
-	fmt.Printf("%s\n", out)
-}
diff --git a/go.mod b/go.mod
@@ -0,0 +1,3 @@
+module aozora2fmt
+
+go 1.18
diff --git a/maps.go b/maps.go
@@ -0,0 +1,158 @@
+package aozora2fmt
+
+func AccentMap() map[string]string {
+	/* https://web.archive.org/web/20220206093806/http://aozora.gr.jp/accent_separation.html */
+	return map[string]string{
+		"A&":  "Å",
+		"A'":  "Á",
+		"A:":  "Ä",
+		"AE&": "Æ",
+		"A^":  "Â",
+		"A_":  "Ā",
+		"A`":  "À",
+		"A~":  "Ã",
+		"C'":  "Ć",
+		"C,":  "Ç",
+		"C^":  "Ĉ",
+		"D/":  "Đ",
+		"E'":  "É",
+		"E:":  "Ë",
+		"E^":  "Ê",
+		"E_":  "Ē",
+		"E`":  "È",
+		"E~":  "Ẽ",
+		"G^":  "Ĝ",
+		"H^":  "Ĥ",
+		"I'":  "Í",
+		"I:":  "Ï",
+		"I^":  "Î",
+		"I_":  "Ī",
+		"I`":  "Ì",
+		"I~":  "Ĩ",
+		"J^":  "Ĵ",
+		"L'":  "Ĺ",
+		"L/":  "Ł",
+		"M'":  "Ḿ",
+		"N'":  "Ń",
+		"N`":  "Ǹ",
+		"N~":  "Ñ",
+		"O'":  "Ó",
+		"O/":  "Ø",
+		"O:":  "Ö",
+		"OE&": "Œ",
+		"O^":  "Ô",
+		"O_":  "Ō",
+		"O`":  "Ò",
+		"O~":  "Õ",
+		"R'":  "Ŕ",
+		"S'":  "Ś",
+		"S,":  "Ş",
+		"S^":  "Ŝ",
+		"T,":  "Ţ",
+		"U&":  "Ů",
+		"U'":  "Ú",
+		"U:":  "Ü",
+		"U^":  "Û",
+		"U_":  "Ū",
+		"U`":  "Ù",
+		"U~":  "Ũ",
+		"Y'":  "Ý",
+		"Z'":  "Ź",
+		"a&":  "å",
+		"a'":  "á",
+		"a:":  "ä",
+		"a^":  "â",
+		"a_":  "ā",
+		"a`":  "à",
+		"ae&": "æ",
+		"a~":  "ã",
+		"c'":  "ć",
+		"c,":  "ç",
+		"c^":  "ĉ",
+		"d/":  "đ",
+		"e'":  "é",
+		"e:":  "ë",
+		"e^":  "ê",
+		"e_":  "ē",
+		"e`":  "è",
+		"e~":  "ẽ",
+		"g^":  "ĝ",
+		"h/":  "ħ",
+		"h^":  "ĥ",
+		"i'":  "í",
+		"i/":  "ɨ",
+		"i:":  "ï",
+		"i^":  "î",
+		"i_":  "ī",
+		"i`":  "ì",
+		"i~":  "ĩ",
+		"j^":  "ĵ",
+		"l'":  "ĺ",
+		"l/":  "ł",
+		"m'":  "ḿ",
+		"n'":  "ń",
+		"n`":  "ǹ",
+		"n~":  "ñ",
+		"o'":  "ó",
+		"o/":  "ø",
+		"o:":  "ö",
+		"o^":  "ô",
+		"o_":  "ō",
+		"o`":  "ò",
+		"oe&": "œ",
+		"o~":  "õ",
+		"r'":  "ŕ",
+		"s&":  "ß",
+		"s'":  "ś",
+		"s,":  "ş",
+		"s^":  "ŝ",
+		"t,":  "ţ",
+		"u&":  "ů",
+		"u'":  "ú",
+		"u:":  "ü",
+		"u^":  "û",
+		"u_":  "ū",
+		"u`":  "ù",
+		"u~":  "ũ",
+		"y'":  "ý",
+		"y:":  "ÿ",
+		"z'":  "ź",
+	}
+}
+
+func JisMap() map[int]string {
+	/* https://kanji.jitenon.jp/ */
+	/* http://www13.plala.or.jp/bigdata/index_kanji.html */
+	return map[int]string{
+		311476: "匇",
+		311524: "噱",
+		311589: "媧",
+		318428: "彘",
+		318431: "彽",
+		318445: "怳",
+		318454: "惝",
+		318455: "惸",
+		318459: "愷",
+		318466: "戢",
+		318477: "挘",
+		318615: "橛",
+		318662: "泫",
+		318740: "炷",
+		318764: "燄",
+		318771: "犍",
+		318822: "璆",
+		318881: "眶",
+		318885: "睜",
+		319155: "蛼",
+		319239: "蹰",
+		319278: "鄢",
+		319413: "騃",
+		319484: "鼹",
+		421283: "戕",
+		428874: "譃",
+		429267: "餼",
+		429268: "饀",
+		429271: "饍",
+		429337: "魳",
+	}
+}