Browse Source

Removing old WordPress import files

pull/1/head
Annika Backstrom 1 year ago
parent
commit
f46a25acff

+ 0
- 63
sixohthree-import/add-fields-from-xml.py View File

@@ -1,63 +0,0 @@
import os
import sys
import xml.etree.ElementTree as ET

NS_MAP = {
'excerpt': 'http://wordpress.org/export/1.2/excerpt/',
}

for (a, b) in NS_MAP.iteritems():
ET.register_namespace(a, b)

tree = ET.parse('../sixohthree-import/sixohthreecom.wordpress.2013-01-15.xml')
root = tree.getroot()

for item in root.iter('item'):
excerpt_text = post_format_text = ''

excerpt = item.find('{http://wordpress.org/export/1.2/excerpt/}encoded')
post_format = item.find('category[@domain="post_format"]')
post_name = item.find('{http://wordpress.org/export/1.2/}post_name').text
post_id = item.find('{http://wordpress.org/export/1.2/}post_id').text
post_type = item.find('{http://wordpress.org/export/1.2/}post_type').text

if post_type != 'post':
sys.stderr.write("Skipping [%s] %s (%s)\n" % (post_type, post_name, post_id))
continue

try:
excerpt_text = excerpt.text
except AttributeError:
pass

try:
post_format_text = post_format.text
except AttributeError:
pass

if excerpt_text == None and post_format == None:
continue

filename = "../content/%s-%s.md" % (post_id, post_name)

try:
f = open(filename, 'r')
except IOError:
sys.stderr.write("Cannot find file %s\n" % filename)
continue

contents = f.read()

tmp = open(filename + '.tmp', 'w')

if excerpt_text:
tmp.write('Summary: %s\n' % excerpt_text)

if post_format_text:
tmp.write('WordPress-Post-Format: %s\n' % post_format_text)

tmp.write(contents)

# print excerpt_text, post_format_text

print "Done."

+ 0
- 16
sixohthree-import/addslug.py View File

@@ -1,16 +0,0 @@
#!/usr/bin/python2.7

import sys

filenames = sys.argv[1:]

for filename in filenames:
f = open(filename, 'r')
text = f.read()

slug = filename[:-3].replace('-', '/', 1) + '/'

tmp = open(filename + '.tmp', 'w')
tmp.write('Slug: %s\n' % slug)

tmp.write(text)

+ 0
- 53
sixohthree-import/directorize.py View File

@@ -1,53 +0,0 @@
#!/usr/bin/python

import os
import re

MAP = {
"WordPress Post ID": "post_id"
}

def read_headers(filename):
if filename[-3:] != '.md':
return False

f = open(filename, 'r')

result = {}

for line in f:
line = line[:-1]

if line == "":
break

header, value = line.split(": ", 1)

try:
result[MAP[header]] = value
except KeyError:
pass

return result

for dirname, dirnames, filenames in os.walk('.'):
for filename in filenames:
headers = read_headers(filename)

if not headers:
print "BAD %s" % filename
continue

if not os.path.isdir(headers['post_id']):
os.mkdir(headers['post_id'])

newname = os.path.join(headers['post_id'], filename)

if os.path.isfile(newname):
os.unlink(filename)
print "EXISTS %s" % newname
continue

print "%s -> %s" % (filename, newname)
os.link(filename, newname)
os.unlink(filename)

+ 0
- 1873
sixohthree-import/sitemap.xml
File diff suppressed because it is too large
View File


+ 0
- 39482
sixohthree-import/sixohthreecom.wordpress.2013-01-15.xml
File diff suppressed because it is too large
View File


+ 0
- 74
sixohthree-import/tagmerge.py View File

@@ -1,74 +0,0 @@
#!/usr/bin/python2.7

import markdown
import os

from markdown.extensions.meta import META_RE, META_MORE_RE

MetaPreprocessor = markdown.extensions.meta.MetaPreprocessor

MAP = {
'wordpress-post-id': 'WordPress-Post-ID',
'wordpress-post-type': 'WordPress-Post-Type',
'wordpress-post-format': 'WordPress-Post-Format',
}

ORDER = [
'Title',
'Slug',
'Summary',
'Date',
'Author',
'Tags',
'WordPress-Post-ID',
'WordPress-Post-Type',
'WordPress-Post-Format',
]

def headerfix(s):
try:
return MAP[s]
except KeyError:
return s.capitalize()

def parse_file(f):
text = f.read()
lines = text.split("\n")

md = markdown.Markdown()
mp = MetaPreprocessor(markdown)
lines = mp.run(lines)
text = "\n".join(lines)
meta = mp.markdown.Meta

tags = []

if 'category' in meta:
tags += meta['category']
del meta['category']

if 'tags' in meta:
tags += meta['tags']
del meta['tags']

if tags:
meta['tags'] = [", ".join(tags)]

headers = [(headerfix(k), v[0]) for k, v in meta.items()]
headers.sort(key=lambda k: ORDER.index(k[0]))
headers = "\n".join(["{0}: {1}".format(k, v) for k, v in headers])

return "%s\n\n%s" % (headers, text)

for dirname, dirnames, filenames in os.walk('.'):
for filename in filenames:
if filename[-3:] != '.md':
continue

f = open(filename, 'r')
fixed = parse_file(f)
f.close()

f = open(filename, 'w')
f.write(fixed)
f.close()

Loading…
Cancel
Save