deftimestamp_to_iso8601(timestamp): # Convert timestamp to a datetime object try: dt_object = datetime.fromtimestamp(int(timestamp) / 1000) except TypeError: return timestamp
# Format the datetime object to ISO 8601 format iso8601_format = dt_object.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
return iso8601_format
withopen('comment.0.jsonl', 'rt', encoding='utf8') as f: lines = f.readlines()
defwrite_data(data): withopen('Comment.json', 'wt+', encoding='utf8') as f: f.write(data)
exist_data = [] finaldata = [] # write_data('#filetype:JSON-streaming {"type":"Class","class":"Comment"}\n') for line in lines: if line.startswith('#'): continue data = json.loads(line) if {"nick": data.get('nick'), "link": data.get('link'), 'comment': data.get('comment')} in exist_data: continue if data.get('created'): data['insertedAt'] = {"__type":"Date","iso":timestamp_to_iso8601(data.get('created'))} # Valine的时间索引 data['createdAt'] = timestamp_to_iso8601(data.get('created')) # Leancloud自带 data['updatedAt'] = timestamp_to_iso8601(data.get('created')) # Leancloud自带 if data.get('top'): del data['top'] if data.get('master'): del data['master'] if data.get('uid'): del data['uid'] if data.get('created'): del data['created'] if data.get('mailMd5'): del data['mailMd5'] finaldata.append(data) exist_data.append({ "nick": data.get('nick'), "link": data.get('link'), 'comment': data.get('comment') })