main.go (4803B)
1 /* See LICENSE for license details. */ 2 package main 3 4 import ( 5 "bufio" 6 "flag" 7 "fmt" 8 "log" 9 "os" 10 "regexp" 11 "strings" 12 "unicode/utf8" 13 14 "aozora2fmt" 15 ) 16 17 type OutFmt struct { 18 ruby string /* Ruby output format */ 19 hdr string /* Header format */ 20 shdr string /* Subheader format */ 21 sshdr string /* Subsubheader format */ 22 pb string /* Page Break text */ 23 } 24 25 func usage() { 26 fmt.Fprintf(os.Stderr, "usage: %s [-d] [-f format] file\n", os.Args[0]) 27 flag.PrintDefaults() 28 } 29 30 func get_outfmt(fmt string) *OutFmt { 31 of := new(OutFmt) 32 33 switch fmt { 34 case "tex": 35 of.ruby = "\\ruby{%s}{%s}" 36 of.hdr = "\\chapter{%s}" 37 of.shdr = "\\section*{%s}" 38 of.sshdr = "\\subsection*{%s}" 39 of.pb = "\\newpage" 40 case "md": 41 of.ruby = "<ruby>%s<rp>《</rp><rt>%s</rt><rp>》</rp></ruby>" 42 of.hdr = "# %s" 43 of.shdr = "## %s" 44 of.sshdr = "### %s" 45 of.pb = "<div style='break-after:always'></div>" 46 case "plain": 47 of.ruby = "[%s:%s]" 48 of.hdr = "%s" 49 of.shdr = "%s" 50 of.sshdr = "%s" 51 of.pb = "" 52 } 53 54 return of 55 } 56 57 func replace_jis(str string) string { 58 exp := regexp.MustCompile(`※[#[^」]+」、([^]]+)]`) 59 60 for _, matches := range exp.FindAllStringSubmatch(str, -1) { 61 p, m, k, t := 0, 0, 0, 0 62 n, _ := fmt.Sscanf(matches[1], `第%01d水準%01d-%02d-%02d`, &p, &m, &k, &t) 63 64 if n != 4 { 65 /* the same character appeared multiple times in str */ 66 continue 67 } 68 69 str = strings.Replace(str, matches[0], aozora2fmt.Jis2Utf8(m, k, t), -1) 70 } 71 72 return str 73 } 74 75 func replace_ruby(str string, of *OutFmt) string { 76 kanji := `\x{3400}-\x{4DBF}` + /* CJK Unified Ideographs Extension A */ 77 `\x{4E00}-\x{9FFF}` + /* CJK Unified Ideographs */ 78 `\x{F900}-\x{FAFF}` + /* CJK Compatibility Ideographs */ 79 `\x{20000}-\x{2FA1F}` + /* CJK Unified Ideographs Extension B - F, Supplement */ 80 `〆〻〇々ヶ` 81 ruby_exp := regexp.MustCompile(`[|]?([` + kanji + `]+)《([^》]+)》`) 82 for _, matches := range ruby_exp.FindAllStringSubmatch(str, -1) { 83 replacement := fmt.Sprintf(of.ruby, matches[1], matches[2]) 84 str = strings.Replace(str, matches[0], replacement, -1) 85 } 86 87 bouten_exp := regexp.MustCompile(`[#「([^」]+)」に傍点]`) 88 for _, matches := range bouten_exp.FindAllStringSubmatch(str, -1) { 89 bouten := strings.Repeat("﹅", utf8.RuneCountInString(matches[1])) 90 replacement := fmt.Sprintf(of.ruby, matches[1], bouten) 91 str = strings.Replace(str, matches[1] + matches[0], replacement, -1) 92 } 93 94 return str 95 } 96 97 func replace_accents(str string) string { 98 exp := regexp.MustCompile(`〔([^〕]+)〕`) 99 100 for _, matches := range exp.FindAllStringSubmatch(str, -1) { 101 str = strings.Replace(str, matches[0], matches[1], -1) 102 103 m := aozora2fmt.AccentMap() 104 for key := range m { 105 str = strings.ReplaceAll(str, key, m[key]) 106 } 107 } 108 109 return str 110 } 111 112 func replace_hdrs(str string, of *OutFmt) string { 113 exp := regexp.MustCompile(`\n\n[[^[]+[#「([^」]+)」は([大中小])見出し]\n\n\n`) 114 slices := exp.FindAllStringSubmatch(str, -1) 115 if slices == nil { 116 exp = regexp.MustCompile(`\n\n\n([^\n]+)\n\n\n`) 117 for _, matches := range exp.FindAllStringSubmatch(str, -1) { 118 replacement := "\n" + fmt.Sprintf(of.hdr, matches[1]) + "\n" 119 str = strings.Replace(str, matches[0], replacement, -1) 120 } 121 return str 122 } 123 124 for _, matches := range slices { 125 var replacement string 126 switch matches[2] { 127 case "大": 128 replacement = fmt.Sprintf(of.hdr, matches[1]) 129 case "中": 130 replacement = fmt.Sprintf(of.shdr, matches[1]) 131 case "小": 132 replacement = fmt.Sprintf(of.sshdr, matches[1]) 133 default: 134 log.Printf("bad hdr: %s\n", matches[0]) 135 replacement = matches[1] 136 } 137 str = strings.Replace(str, matches[0], replacement + "\n", -1) 138 } 139 140 return str 141 } 142 143 func trim_info(str string) string { 144 delim := "\n" + strings.Repeat("-", 55) + "\n" 145 146 slices := strings.Split(str, delim) 147 148 return strings.Join([]string{slices[0], slices[2]}, "") 149 } 150 151 func parse(file string, of *OutFmt, debug bool) string { 152 f, err := os.Open(file) 153 defer f.Close() 154 if err != nil { 155 log.Fatal(err) 156 } 157 158 var lines []string 159 r := bufio.NewScanner(f) 160 for r.Scan() { 161 line := strings.Trim(r.Text(), " ") 162 line = replace_jis(line) 163 line = replace_ruby(line, of) 164 line = replace_accents(line) 165 lines = append(lines, line) 166 } 167 168 out := strings.Join(lines, "\n\n"); 169 out = replace_hdrs(out, of) 170 out = strings.Replace(out, "[#改ページ]", of.pb, -1) 171 172 if (debug == false) { 173 out = trim_info(out) 174 } 175 176 return out 177 } 178 179 func main() { 180 var ( 181 debug = flag.Bool("d", false, "debug mode") 182 format = flag.String("f", "plain", "output format [plain|md|tex]") 183 ) 184 185 flag.Usage = usage 186 flag.Parse() 187 188 if flag.NArg() != 1 { 189 usage() 190 os.Exit(1) 191 } 192 193 log.SetFlags(log.Lshortfile) 194 195 of := get_outfmt(*format) 196 out := parse(flag.Arg(0), of, *debug) 197 198 fmt.Printf("%s\n", out) 199 }