From 751ffb9de9371a1ed0efb85bbff12049419f681c Mon Sep 17 00:00:00 2001 From: Christian Schabesberger Date: Fri, 12 Feb 2016 01:29:14 +0100 Subject: [PATCH] quick and dirty solution for download/audio bug Youtube has changed (again) this time it was a bit harder, therfor the changes are deeper. May be still a bit unstable. --- .../youtube/YoutubeSearchEngineTest.java | 1 + ...=> YoutubeStreamExtractorDefaultTest.java} | 24 +- ...va => YoutubeStreamExtractorGemaTest.java} | 10 +- .../newpipe/VideoItemDetailActivity.java | 2 +- .../newpipe/VideoItemDetailFragment.java | 16 +- .../schabi/newpipe/crawler/DashMpdParser.java | 5 +- .../schabi/newpipe/crawler/MediaFormat.java | 2 + .../crawler/{RegexHelper.java => Parser.java} | 17 +- ...deoExtractor.java => StreamExtractor.java} | 5 +- .../newpipe/crawler/StreamingService.java | 2 +- .../org/schabi/newpipe/crawler/VideoInfo.java | 50 ++- .../services/youtube/YoutubeSearchEngine.java | 2 - .../services/youtube/YoutubeService.java | 6 +- ...actor.java => YoutubeStreamExtractor.java} | 292 ++++++++++++------ .../youtube/YoutubeVideoUrlIdHandler.java | 4 +- 15 files changed, 308 insertions(+), 130 deletions(-) rename app/src/androidTest/java/org/schabi/newpipe/services/youtube/{YoutubeVideoExtractorDefaultTest.java => YoutubeStreamExtractorDefaultTest.java} (78%) rename app/src/androidTest/java/org/schabi/newpipe/services/youtube/{YoutubeVideoExtractorGemaTest.java => YoutubeStreamExtractorGemaTest.java} (79%) rename app/src/main/java/org/schabi/newpipe/crawler/{RegexHelper.java => Parser.java} (74%) rename app/src/main/java/org/schabi/newpipe/crawler/{VideoExtractor.java => StreamExtractor.java} (94%) rename app/src/main/java/org/schabi/newpipe/crawler/services/youtube/{YoutubeVideoExtractor.java => YoutubeStreamExtractor.java} (67%) diff --git a/app/src/androidTest/java/org/schabi/newpipe/services/youtube/YoutubeSearchEngineTest.java b/app/src/androidTest/java/org/schabi/newpipe/services/youtube/YoutubeSearchEngineTest.java index d81c4b431..dfd9fef23 100644 --- a/app/src/androidTest/java/org/schabi/newpipe/services/youtube/YoutubeSearchEngineTest.java +++ b/app/src/androidTest/java/org/schabi/newpipe/services/youtube/YoutubeSearchEngineTest.java @@ -37,6 +37,7 @@ public class YoutubeSearchEngineTest extends AndroidTestCase { public void setUp() throws Exception{ super.setUp(); SearchEngine engine = new YoutubeSearchEngine(); + result = engine.search("https://www.youtube.com/results?search_query=bla", 0, "de", new Downloader()); suggestionReply = engine.suggestionList("hello", new Downloader()); diff --git a/app/src/androidTest/java/org/schabi/newpipe/services/youtube/YoutubeVideoExtractorDefaultTest.java b/app/src/androidTest/java/org/schabi/newpipe/services/youtube/YoutubeStreamExtractorDefaultTest.java similarity index 78% rename from app/src/androidTest/java/org/schabi/newpipe/services/youtube/YoutubeVideoExtractorDefaultTest.java rename to app/src/androidTest/java/org/schabi/newpipe/services/youtube/YoutubeStreamExtractorDefaultTest.java index 19246b54c..2bf6dbf9d 100644 --- a/app/src/androidTest/java/org/schabi/newpipe/services/youtube/YoutubeVideoExtractorDefaultTest.java +++ b/app/src/androidTest/java/org/schabi/newpipe/services/youtube/YoutubeStreamExtractorDefaultTest.java @@ -1,12 +1,11 @@ package org.schabi.newpipe.services.youtube; import android.test.AndroidTestCase; -import android.util.Log; import org.schabi.newpipe.Downloader; import org.schabi.newpipe.crawler.CrawlingException; import org.schabi.newpipe.crawler.ParsingException; -import org.schabi.newpipe.crawler.services.youtube.YoutubeVideoExtractor; +import org.schabi.newpipe.crawler.services.youtube.YoutubeStreamExtractor; import org.schabi.newpipe.crawler.VideoInfo; import java.io.IOException; @@ -31,11 +30,15 @@ import java.io.IOException; * along with NewPipe. If not, see . */ -public class YoutubeVideoExtractorDefaultTest extends AndroidTestCase { - private YoutubeVideoExtractor extractor; +public class YoutubeStreamExtractorDefaultTest extends AndroidTestCase { + private YoutubeStreamExtractor extractor; public void setUp() throws IOException, CrawlingException { - extractor = new YoutubeVideoExtractor("https://www.youtube.com/watch?v=FmG385_uUys", + /* some anonymus video test + extractor = new YoutubeStreamExtractor("https://www.youtube.com/watch?v=FmG385_uUys", + new Downloader()); */ + /* some vevo video (suggested to test against) */ + extractor = new YoutubeStreamExtractor("https://www.youtube.com/watch?v=YQHsXMglC9A", new Downloader()); } @@ -45,8 +48,8 @@ public class YoutubeVideoExtractorDefaultTest extends AndroidTestCase { } public void testGetValidTimeStamp() throws CrawlingException, IOException { - YoutubeVideoExtractor extractor = - new YoutubeVideoExtractor("https://youtu.be/FmG385_uUys?t=174", new Downloader()); + YoutubeStreamExtractor extractor = + new YoutubeStreamExtractor("https://youtu.be/FmG385_uUys?t=174", new Downloader()); assertTrue(Integer.toString(extractor.getTimeStamp()), extractor.getTimeStamp() == 174); } @@ -86,7 +89,7 @@ public class YoutubeVideoExtractorDefaultTest extends AndroidTestCase { } public void testGetAudioStreams() throws ParsingException { - assertTrue(extractor.getAudioStreams() == null); + assertTrue(!extractor.getAudioStreams().isEmpty()); } public void testGetVideoStreams() throws ParsingException { @@ -98,4 +101,9 @@ public class YoutubeVideoExtractorDefaultTest extends AndroidTestCase { 0 <= s.format && s.format <= 4); } } + + public void testGetDashMpd() throws ParsingException { + assertTrue(extractor.getDashMpdUrl(), + !extractor.getDashMpdUrl().isEmpty()); + } } diff --git a/app/src/androidTest/java/org/schabi/newpipe/services/youtube/YoutubeVideoExtractorGemaTest.java b/app/src/androidTest/java/org/schabi/newpipe/services/youtube/YoutubeStreamExtractorGemaTest.java similarity index 79% rename from app/src/androidTest/java/org/schabi/newpipe/services/youtube/YoutubeVideoExtractorGemaTest.java rename to app/src/androidTest/java/org/schabi/newpipe/services/youtube/YoutubeStreamExtractorGemaTest.java index 8125ef5e3..9d3bf376a 100644 --- a/app/src/androidTest/java/org/schabi/newpipe/services/youtube/YoutubeVideoExtractorGemaTest.java +++ b/app/src/androidTest/java/org/schabi/newpipe/services/youtube/YoutubeStreamExtractorGemaTest.java @@ -4,9 +4,7 @@ import android.test.AndroidTestCase; import org.schabi.newpipe.Downloader; import org.schabi.newpipe.crawler.CrawlingException; -import org.schabi.newpipe.crawler.services.youtube.YoutubeVideoExtractor; -import org.schabi.newpipe.crawler.VideoInfo; -import org.schabi.newpipe.Downloader; +import org.schabi.newpipe.crawler.services.youtube.YoutubeStreamExtractor; import java.io.IOException; @@ -32,7 +30,7 @@ import java.io.IOException; // This class only works in Germany. -public class YoutubeVideoExtractorGemaTest extends AndroidTestCase { +public class YoutubeStreamExtractorGemaTest extends AndroidTestCase { // Deaktivate this Test Case bevore uploading it githup, otherwise CI will fail. private static final boolean testActive = false; @@ -40,10 +38,10 @@ public class YoutubeVideoExtractorGemaTest extends AndroidTestCase { public void testGemaError() throws IOException, CrawlingException { if(testActive) { try { - new YoutubeVideoExtractor("https://www.youtube.com/watch?v=3O1_3zBUKM8", + new YoutubeStreamExtractor("https://www.youtube.com/watch?v=3O1_3zBUKM8", new Downloader()); assertTrue("Gema exception not thrown", false); - } catch(YoutubeVideoExtractor.GemaException ge) { + } catch(YoutubeStreamExtractor.GemaException ge) { assertTrue(true); } } diff --git a/app/src/main/java/org/schabi/newpipe/VideoItemDetailActivity.java b/app/src/main/java/org/schabi/newpipe/VideoItemDetailActivity.java index 3be69fbd6..21b5bf326 100644 --- a/app/src/main/java/org/schabi/newpipe/VideoItemDetailActivity.java +++ b/app/src/main/java/org/schabi/newpipe/VideoItemDetailActivity.java @@ -71,7 +71,7 @@ public class VideoItemDetailActivity extends AppCompatActivity { if (getIntent().getData() != null) { videoUrl = getIntent().getData().toString(); StreamingService[] serviceList = ServiceList.getServices(); - //VideoExtractor videoExtractor = null; + //StreamExtractor videoExtractor = null; for (int i = 0; i < serviceList.length; i++) { if (serviceList[i].getUrlIdHandler().acceptUrl(videoUrl)) { arguments.putInt(VideoItemDetailFragment.STREAMING_SERVICE, i); diff --git a/app/src/main/java/org/schabi/newpipe/VideoItemDetailFragment.java b/app/src/main/java/org/schabi/newpipe/VideoItemDetailFragment.java index 2793e0a98..c1437376b 100644 --- a/app/src/main/java/org/schabi/newpipe/VideoItemDetailFragment.java +++ b/app/src/main/java/org/schabi/newpipe/VideoItemDetailFragment.java @@ -49,11 +49,11 @@ import java.util.Vector; import org.schabi.newpipe.crawler.MediaFormat; import org.schabi.newpipe.crawler.ParsingException; import org.schabi.newpipe.crawler.ServiceList; +import org.schabi.newpipe.crawler.StreamExtractor; import org.schabi.newpipe.crawler.VideoPreviewInfo; -import org.schabi.newpipe.crawler.VideoExtractor; import org.schabi.newpipe.crawler.StreamingService; import org.schabi.newpipe.crawler.VideoInfo; -import org.schabi.newpipe.crawler.services.youtube.YoutubeVideoExtractor; +import org.schabi.newpipe.crawler.services.youtube.YoutubeStreamExtractor; /** @@ -115,7 +115,7 @@ public class VideoItemDetailFragment extends Fragment { private class VideoExtractorRunnable implements Runnable { private final Handler h = new Handler(); - private VideoExtractor videoExtractor; + private StreamExtractor streamExtractor; private final StreamingService service; private final String videoUrl; @@ -127,8 +127,8 @@ public class VideoItemDetailFragment extends Fragment { @Override public void run() { try { - videoExtractor = service.getExtractorInstance(videoUrl, new Downloader()); - VideoInfo videoInfo = VideoInfo.getVideoInfo(videoExtractor, new Downloader()); + streamExtractor = service.getExtractorInstance(videoUrl, new Downloader()); + VideoInfo videoInfo = VideoInfo.getVideoInfo(streamExtractor, new Downloader()); h.post(new VideoResultReturnedRunnable(videoInfo)); } catch (IOException e) { @@ -136,10 +136,10 @@ public class VideoItemDetailFragment extends Fragment { e.printStackTrace(); } // custom service related exceptions - catch (YoutubeVideoExtractor.DecryptException de) { + catch (YoutubeStreamExtractor.DecryptException de) { postNewErrorToast(h, R.string.youtube_signature_decryption_error); de.printStackTrace(); - } catch (YoutubeVideoExtractor.GemaException ge) { + } catch (YoutubeStreamExtractor.GemaException ge) { h.post(new Runnable() { @Override public void run() { @@ -148,7 +148,7 @@ public class VideoItemDetailFragment extends Fragment { }); } // ---------------------------------------- - catch(VideoExtractor.ContentNotAvailableException e) { + catch(StreamExtractor.ContentNotAvailableException e) { h.post(new Runnable() { @Override public void run() { diff --git a/app/src/main/java/org/schabi/newpipe/crawler/DashMpdParser.java b/app/src/main/java/org/schabi/newpipe/crawler/DashMpdParser.java index 027cc66a5..7758a24ee 100644 --- a/app/src/main/java/org/schabi/newpipe/crawler/DashMpdParser.java +++ b/app/src/main/java/org/schabi/newpipe/crawler/DashMpdParser.java @@ -74,6 +74,7 @@ public class DashMpdParser { break; case XmlPullParser.TEXT: + // actual stream tag if(currentTagIsBaseUrl && (currentMimeType.contains("audio"))) { int format = -1; @@ -86,12 +87,14 @@ public class DashMpdParser { format, currentBandwidth, currentSamplingRate)); } break; + case XmlPullParser.END_TAG: if(tagName.equals("AdaptationSet")) { currentMimeType = ""; } else if(tagName.equals("BaseURL")) { currentTagIsBaseUrl = false; - }//no break needed here + } + break; } } } catch(Exception e) { diff --git a/app/src/main/java/org/schabi/newpipe/crawler/MediaFormat.java b/app/src/main/java/org/schabi/newpipe/crawler/MediaFormat.java index 63b94fd47..938c6310b 100644 --- a/app/src/main/java/org/schabi/newpipe/crawler/MediaFormat.java +++ b/app/src/main/java/org/schabi/newpipe/crawler/MediaFormat.java @@ -25,10 +25,12 @@ package org.schabi.newpipe.crawler; /**Static data about various media formats support by Newpipe, eg mime type, extension*/ public enum MediaFormat { + //video and audio combined formats // id name suffix mime type MPEG_4 (0x0, "MPEG-4", "mp4", "video/mp4"), v3GPP (0x1, "3GPP", "3gp", "video/3gpp"), WEBM (0x2, "WebM", "webm", "video/webm"), + // audio formats M4A (0x3, "m4a", "m4a", "audio/mp4"), WEBMA (0x4, "WebM", "webm", "audio/webm"); diff --git a/app/src/main/java/org/schabi/newpipe/crawler/RegexHelper.java b/app/src/main/java/org/schabi/newpipe/crawler/Parser.java similarity index 74% rename from app/src/main/java/org/schabi/newpipe/crawler/RegexHelper.java rename to app/src/main/java/org/schabi/newpipe/crawler/Parser.java index a82386182..56eec5c62 100644 --- a/app/src/main/java/org/schabi/newpipe/crawler/RegexHelper.java +++ b/app/src/main/java/org/schabi/newpipe/crawler/Parser.java @@ -1,5 +1,9 @@ package org.schabi.newpipe.crawler; +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; +import java.util.HashMap; +import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -7,7 +11,7 @@ import java.util.regex.Pattern; * Created by Christian Schabesberger on 02.02.16. * * Copyright (C) Christian Schabesberger 2016 - * RegexHelper.java is part of NewPipe. + * Parser.java is part of NewPipe. * * NewPipe is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,7 +28,7 @@ import java.util.regex.Pattern; */ /** avoid using regex !!! */ -public class RegexHelper { +public class Parser { public static class RegexException extends ParsingException { public RegexException(String message) { @@ -44,4 +48,13 @@ public class RegexHelper { throw new RegexException("failed to find pattern \""+pattern+" inside of "+input+"\""); } } + + public static Map compatParseMap(final String input) throws UnsupportedEncodingException { + Map map = new HashMap<>(); + for(String arg : input.split("&")) { + String[] split_arg = arg.split("="); + map.put(split_arg[0], URLDecoder.decode(split_arg[1], "UTF-8")); + } + return map; + } } diff --git a/app/src/main/java/org/schabi/newpipe/crawler/VideoExtractor.java b/app/src/main/java/org/schabi/newpipe/crawler/StreamExtractor.java similarity index 94% rename from app/src/main/java/org/schabi/newpipe/crawler/VideoExtractor.java rename to app/src/main/java/org/schabi/newpipe/crawler/StreamExtractor.java index 44b4e743d..a363269cb 100644 --- a/app/src/main/java/org/schabi/newpipe/crawler/VideoExtractor.java +++ b/app/src/main/java/org/schabi/newpipe/crawler/StreamExtractor.java @@ -4,7 +4,7 @@ package org.schabi.newpipe.crawler; * Created by Christian Schabesberger on 10.08.15. * * Copyright (C) Christian Schabesberger 2015 - * VideoExtractor.java is part of NewPipe. + * StreamExtractor.java is part of NewPipe. * * NewPipe is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,7 +26,7 @@ import java.util.List; @SuppressWarnings("ALL") -public interface VideoExtractor { +public interface StreamExtractor { public class ExctractorInitException extends CrawlingException { public ExctractorInitException() {} @@ -65,6 +65,7 @@ public interface VideoExtractor { public abstract String getUploaderThumbnailUrl() throws ParsingException; public abstract List getAudioStreams() throws ParsingException; public abstract List getVideoStreams() throws ParsingException; + public abstract List getVideoOnlyStreams() throws ParsingException; public abstract String getDashMpdUrl() throws ParsingException; public abstract int getAgeLimit() throws ParsingException; public abstract String getAverageRating() throws ParsingException; diff --git a/app/src/main/java/org/schabi/newpipe/crawler/StreamingService.java b/app/src/main/java/org/schabi/newpipe/crawler/StreamingService.java index 9b6f4e285..6c87a5c7b 100644 --- a/app/src/main/java/org/schabi/newpipe/crawler/StreamingService.java +++ b/app/src/main/java/org/schabi/newpipe/crawler/StreamingService.java @@ -27,7 +27,7 @@ public interface StreamingService { public String name = ""; } ServiceInfo getServiceInfo(); - VideoExtractor getExtractorInstance(String url, Downloader downloader) + StreamExtractor getExtractorInstance(String url, Downloader downloader) throws IOException, CrawlingException; SearchEngine getSearchEngineInstance(); diff --git a/app/src/main/java/org/schabi/newpipe/crawler/VideoInfo.java b/app/src/main/java/org/schabi/newpipe/crawler/VideoInfo.java index ecc691efc..29bbf24cc 100644 --- a/app/src/main/java/org/schabi/newpipe/crawler/VideoInfo.java +++ b/app/src/main/java/org/schabi/newpipe/crawler/VideoInfo.java @@ -30,7 +30,7 @@ public class VideoInfo extends AbstractVideoInfo { /**Fills out the video info fields which are common to all services. * Probably needs to be overridden by subclasses*/ - public static VideoInfo getVideoInfo(VideoExtractor extractor, Downloader downloader) + public static VideoInfo getVideoInfo(StreamExtractor extractor, Downloader downloader) throws CrawlingException, IOException { VideoInfo videoInfo = new VideoInfo(); @@ -46,18 +46,34 @@ public class VideoInfo extends AbstractVideoInfo { videoInfo.upload_date = extractor.getUploadDate(); videoInfo.thumbnail_url = extractor.getThumbnailUrl(); videoInfo.id = uiconv.getVideoId(extractor.getPageUrl()); - videoInfo.dashMpdUrl = extractor.getDashMpdUrl(); + //todo: make this quick and dirty solution a real fallback + // The front end should be notified that the dash mpd could not be downloaded + // although not getting the dash mpd is not the end of the world, therfore + // we continue. + try { + videoInfo.dashMpdUrl = extractor.getDashMpdUrl(); + } catch(Exception e) { + e.printStackTrace(); + } /** Load and extract audio*/ videoInfo.audio_streams = extractor.getAudioStreams(); if(videoInfo.dashMpdUrl != null && !videoInfo.dashMpdUrl.isEmpty()) { if(videoInfo.audio_streams == null) { videoInfo.audio_streams = new Vector(); } - videoInfo.audio_streams.addAll( - DashMpdParser.getAudioStreams(videoInfo.dashMpdUrl, downloader)); + //todo: make this quick and dirty solution a real fallback + // same as the quick and dirty aboth + try { + videoInfo.audio_streams.addAll( + DashMpdParser.getAudioStreams(videoInfo.dashMpdUrl, downloader)); + } catch(Exception e) { + e.printStackTrace(); + } } /** Extract video stream url*/ videoInfo.video_streams = extractor.getVideoStreams(); + /** Extract video only stream url*/ + videoInfo.video_only_streams = extractor.getVideoOnlyStreams(); videoInfo.uploader_thumbnail_url = extractor.getUploaderThumbnailUrl(); videoInfo.start_position = extractor.getTimeStamp(); videoInfo.average_rating = extractor.getAverageRating(); @@ -78,6 +94,7 @@ public class VideoInfo extends AbstractVideoInfo { /*todo: make this lists over vectors*/ public List video_streams = null; public List audio_streams = null; + public List video_only_streams = null; // video streams provided by the dash mpd do not need to be provided as VideoStream. // Later on this will also aplly to audio streams. Since dash mpd is standarized, // crawling such a file is not service dependent. Therefore getting audio only streams by yust @@ -132,6 +149,18 @@ public class VideoInfo extends AbstractVideoInfo { public VideoStream(String url, int format, String res) { this.url = url; this.format = format; resolution = res; } + + // reveals wether two streams are the same, but have diferent urls + public boolean equalStats(VideoStream cmp) { + return format == cmp.format + && resolution == cmp.resolution; + } + + // revelas wether two streams are equal + public boolean equals(VideoStream cmp) { + return equalStats(cmp) + && url == cmp.url; + } } @SuppressWarnings("unused") @@ -145,5 +174,18 @@ public class VideoInfo extends AbstractVideoInfo { this.url = url; this.format = format; this.bandwidth = bandwidth; this.sampling_rate = samplingRate; } + + // reveals wether two streams are the same, but have diferent urls + public boolean equalStats(AudioStream cmp) { + return format == cmp.format + && bandwidth == cmp.bandwidth + && sampling_rate == cmp.sampling_rate; + } + + // revelas wether two streams are equal + public boolean equals(AudioStream cmp) { + return equalStats(cmp) + && url == cmp.url; + } } } \ No newline at end of file diff --git a/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeSearchEngine.java b/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeSearchEngine.java index dca9caa26..a5a547706 100644 --- a/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeSearchEngine.java +++ b/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeSearchEngine.java @@ -6,11 +6,9 @@ import android.util.Log; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import org.schabi.newpipe.crawler.CrawlingException; import org.schabi.newpipe.crawler.Downloader; import org.schabi.newpipe.crawler.ParsingException; import org.schabi.newpipe.crawler.SearchEngine; -import org.schabi.newpipe.crawler.VideoExtractor; import org.schabi.newpipe.crawler.VideoPreviewInfo; import org.w3c.dom.Node; import org.w3c.dom.NodeList; diff --git a/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeService.java b/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeService.java index b49c55b87..43f673c63 100644 --- a/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeService.java +++ b/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeService.java @@ -2,9 +2,9 @@ package org.schabi.newpipe.crawler.services.youtube; import org.schabi.newpipe.crawler.CrawlingException; import org.schabi.newpipe.crawler.Downloader; +import org.schabi.newpipe.crawler.StreamExtractor; import org.schabi.newpipe.crawler.StreamingService; import org.schabi.newpipe.crawler.VideoUrlIdHandler; -import org.schabi.newpipe.crawler.VideoExtractor; import org.schabi.newpipe.crawler.SearchEngine; import java.io.IOException; @@ -38,11 +38,11 @@ public class YoutubeService implements StreamingService { return serviceInfo; } @Override - public VideoExtractor getExtractorInstance(String url, Downloader downloader) + public StreamExtractor getExtractorInstance(String url, Downloader downloader) throws CrawlingException, IOException { VideoUrlIdHandler urlIdHandler = new YoutubeVideoUrlIdHandler(); if(urlIdHandler.acceptUrl(url)) { - return new YoutubeVideoExtractor(url, downloader) ; + return new YoutubeStreamExtractor(url, downloader) ; } else { throw new IllegalArgumentException("supplied String is not a valid Youtube URL"); diff --git a/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeVideoExtractor.java b/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeStreamExtractor.java similarity index 67% rename from app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeVideoExtractor.java rename to app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeStreamExtractor.java index 07d2b23a7..499a942fd 100644 --- a/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeVideoExtractor.java +++ b/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeStreamExtractor.java @@ -1,5 +1,6 @@ package org.schabi.newpipe.crawler.services.youtube; +import android.provider.MediaStore; import android.util.Log; import org.json.JSONException; @@ -7,16 +8,15 @@ import org.json.JSONObject; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import org.jsoup.parser.Parser; import org.mozilla.javascript.Context; import org.mozilla.javascript.Function; import org.mozilla.javascript.ScriptableObject; import org.schabi.newpipe.crawler.CrawlingException; import org.schabi.newpipe.crawler.Downloader; +import org.schabi.newpipe.crawler.Parser; import org.schabi.newpipe.crawler.ParsingException; -import org.schabi.newpipe.crawler.RegexHelper; import org.schabi.newpipe.crawler.VideoUrlIdHandler; -import org.schabi.newpipe.crawler.VideoExtractor; +import org.schabi.newpipe.crawler.StreamExtractor; import org.schabi.newpipe.crawler.MediaFormat; import org.schabi.newpipe.crawler.VideoInfo; import org.schabi.newpipe.crawler.VideoPreviewInfo; @@ -32,7 +32,7 @@ import java.util.Vector; * Created by Christian Schabesberger on 06.08.15. * * Copyright (C) Christian Schabesberger 2015 - * YoutubeVideoExtractor.java is part of NewPipe. + * YoutubeStreamExtractor.java is part of NewPipe. * * NewPipe is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -48,7 +48,98 @@ import java.util.Vector; * along with NewPipe. If not, see . */ -public class YoutubeVideoExtractor implements VideoExtractor { +public class YoutubeStreamExtractor implements StreamExtractor { + + public enum ItagType { + AUDIO, + VIDEO, + VIDEO_ONLY + } + + private static class ItagItem { + public ItagItem(int id, ItagType type, MediaFormat format, String res, int fps) { + this.id = id; + this.itagType = type; + this.mediaFormatId = format.id; + this.resolutionString = res; + this.fps = fps; + } + public ItagItem(int id, ItagType type, MediaFormat format, int samplingRate, int bandWidth) { + this.id = id; + this.itagType = type; + this.mediaFormatId = format.id; + this.samplingRate = samplingRate; + this.bandWidth = bandWidth; + } + public int id; + public ItagType itagType; + public int mediaFormatId; + public String resolutionString = null; + public int fps = -1; + public int samplingRate = -1; + public int bandWidth = -1; + } + + private static final ItagItem[] itagList = { + // video streams + // id, ItagType, MediaFormat, Resolution, fps + new ItagItem(17, ItagType.VIDEO, MediaFormat.v3GPP, "144p", 12), + new ItagItem(18, ItagType.VIDEO, MediaFormat.MPEG_4, "360p", 24), + new ItagItem(22, ItagType.VIDEO, MediaFormat.MPEG_4, "720p", 24), + new ItagItem(36, ItagType.VIDEO, MediaFormat.v3GPP, "240p", 24), + new ItagItem(37, ItagType.VIDEO, MediaFormat.MPEG_4, "1080p", 24), + new ItagItem(38, ItagType.VIDEO, MediaFormat.MPEG_4, "1080p", 24), + new ItagItem(43, ItagType.VIDEO, MediaFormat.WEBM, "360p", 24), + new ItagItem(44, ItagType.VIDEO, MediaFormat.WEBM, "480p", 24), + new ItagItem(45, ItagType.VIDEO, MediaFormat.WEBM, "720p", 24), + new ItagItem(46, ItagType.VIDEO, MediaFormat.WEBM, "1080p", 24), + // audio streams + // id, ItagType, MediaFormat, samplingR, bandwidth + new ItagItem(249, ItagType.AUDIO, MediaFormat.WEBMA, 0, 0), // bandwith/samplingR 0 because not known + new ItagItem(250, ItagType.AUDIO, MediaFormat.WEBMA, 0, 0), + new ItagItem(171, ItagType.AUDIO, MediaFormat.WEBMA, 0, 0), + new ItagItem(140, ItagType.AUDIO, MediaFormat.M4A, 0, 0), + new ItagItem(251, ItagType.AUDIO, MediaFormat.WEBMA, 0, 0), + // video only streams + new ItagItem(160, ItagType.VIDEO_ONLY, MediaFormat.MPEG_4, "144p", 24), + new ItagItem(133, ItagType.VIDEO_ONLY, MediaFormat.MPEG_4, "240p", 24), + new ItagItem(134, ItagType.VIDEO_ONLY, MediaFormat.MPEG_4, "360p", 24), + new ItagItem(135, ItagType.VIDEO_ONLY, MediaFormat.MPEG_4, "480p", 24), + new ItagItem(136, ItagType.VIDEO_ONLY, MediaFormat.MPEG_4, "720p", 24), + new ItagItem(137, ItagType.VIDEO_ONLY, MediaFormat.MPEG_4, "1080p", 24), + }; + + /**These lists only contain itag formats that are supported by the common Android Video player. + However if you are looking for a list showing all itag formats, look at + https://github.com/rg3/youtube-dl/issues/1687 */ + + public static boolean itagIsSupported(int itag) { + for(ItagItem item : itagList) { + if(itag == item.id) { + return true; + } + } + return false; + } + + public static ItagItem getItagItem(int itag) throws ParsingException { + for(ItagItem item : itagList) { + if(itag == item.id) { + return item; + } + } + throw new ParsingException("itag=" + Integer.toString(itag) + " not supported"); + } + + // Sometimes if the html page of youtube is already downloaded, youtube web page will internally + // download the /get_video_info page. Since a certain date dashmpd url is only available over + // this /get_video_info page, so we always need to download this one to. + // %%video_id%% will be replaced by the actual video id + // $$el_type$$ will be replaced by the actual el_type (se the declarations below) + private static final String GET_VIDEO_INFO_URL = + "https://www.youtube.com/get_video_info?video_id=%%video_id%%$$el_type$$&ps=default&eurl=&gl=US&hl=en"; + // eltype is nececeary for the url aboth + private static final String EL_INFO = "el=info"; public class DecryptException extends ParsingException { DecryptException(Throwable cause) { @@ -69,9 +160,10 @@ public class YoutubeVideoExtractor implements VideoExtractor { // ---------------- - private static final String TAG = YoutubeVideoExtractor.class.toString(); + private static final String TAG = YoutubeStreamExtractor.class.toString(); private final Document doc; private JSONObject playerArgs; + private Map videoInfoPage; // static values private static final String DECRYPTION_FUNC_NAME="decrypt"; @@ -84,7 +176,7 @@ public class YoutubeVideoExtractor implements VideoExtractor { private Downloader downloader; - public YoutubeVideoExtractor(String pageUrl, Downloader dl) throws CrawlingException, IOException { + public YoutubeStreamExtractor(String pageUrl, Downloader dl) throws CrawlingException, IOException { //most common videoInfo fields are now set in our superclass, for all services downloader = dl; this.pageUrl = pageUrl; @@ -96,10 +188,10 @@ public class YoutubeVideoExtractor implements VideoExtractor { //attempt to load the youtube js player JSON arguments try { ytPlayerConfigRaw = - RegexHelper.matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", pageContent); + Parser.matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", pageContent); ytPlayerConfig = new JSONObject(ytPlayerConfigRaw); playerArgs = ytPlayerConfig.getJSONObject("args"); - } catch (RegexHelper.RegexException e) { + } catch (Parser.RegexException e) { String errorReason = findErrorReason(doc); switch(errorReason) { case "GEMA": @@ -113,6 +205,17 @@ public class YoutubeVideoExtractor implements VideoExtractor { throw new ParsingException("Could not parse yt player config"); } + + // get videoInfo page + try { + //Parser.unescapeEntities(url_data_str, true).split("&") + String getVideoInfoUrl = GET_VIDEO_INFO_URL.replace("%%video_id%%", + urlidhandler.getVideoId(pageUrl)).replace("$$el_type$$", "&" + EL_INFO); + videoInfoPage = Parser.compatParseMap(downloader.download(getVideoInfoUrl)); + } catch(Exception e) { + throw new ParsingException("Could not load video info page.", e); + } + //---------------------------------- // load and parse description code, if it isn't already initialised //---------------------------------- @@ -211,7 +314,8 @@ public class YoutubeVideoExtractor implements VideoExtractor { return doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href"); } catch(Exception e) { Log.w(TAG, "Could not find high res Thumbnail. Using low res instead"); - } try { //fall through to fallback + } + try { //fall through to fallback return playerArgs.getString("thumbnail_url"); } catch (JSONException je) { throw new ParsingException( @@ -232,10 +336,11 @@ public class YoutubeVideoExtractor implements VideoExtractor { @Override public String getDashMpdUrl() throws ParsingException { + /* try { String dashManifest = playerArgs.getString("dashmpd"); if(!dashManifest.contains("/signature/")) { - String encryptedSig = RegexHelper.matchGroup1("/s/([a-fA-F0-9\\.]+)", dashManifest); + String encryptedSig = Parser.matchGroup1("/s/([a-fA-F0-9\\.]+)", dashManifest); String decryptedSig; decryptedSig = decryptSignature(encryptedSig, decryptionCode); @@ -243,47 +348,95 @@ public class YoutubeVideoExtractor implements VideoExtractor { } return dashManifest; - } catch(NullPointerException e) { + } catch(JSONException je) { throw new ParsingException( - "Could not find \"dashmpd\" upon the player args (maybe no dash manifest available).", e); + "Could not find \"dashmpd\" upon the player args (maybe no dash manifest available).", je); } catch (Exception e) { throw new ParsingException(e); } + */ + try { + String dashManifestUrl = videoInfoPage.get("dashmpd"); + if(!dashManifestUrl.contains("/signature/")) { + String encryptedSig = Parser.matchGroup1("/s/([a-fA-F0-9\\.]+)", dashManifestUrl); + String decryptedSig; + + decryptedSig = decryptSignature(encryptedSig, decryptionCode); + dashManifestUrl = dashManifestUrl.replace("/s/" + encryptedSig, "/signature/" + decryptedSig); + } + return dashManifestUrl; + } catch (Exception e) { + throw new ParsingException( + "Could not get \"dashmpd\" maybe VideoInfoPage is broken.", e); + } } + @Override public List getAudioStreams() throws ParsingException { - /* If we provide a valid dash manifest, we don't need to provide audio streams extra */ - return null; + Vector audioStreams = new Vector<>(); + try{ + String encoded_url_map = playerArgs.getString("adaptive_fmts"); + for(String url_data_str : encoded_url_map.split(",")) { + // This loop iterates through multiple streams, therefor tags + // is related to one and the same stream at a time. + Map tags = Parser.compatParseMap( + org.jsoup.parser.Parser.unescapeEntities(url_data_str, true)); + + int itag = Integer.parseInt(tags.get("itag")); + + if (itagIsSupported(itag)) { + ItagItem itagItem = getItagItem(itag); + if (itagItem.itagType == ItagType.AUDIO) { + String streamUrl = tags.get("url"); + // if video has a signature: decrypt it and add it to the url + if (tags.get("s") != null) { + streamUrl = streamUrl + "&signature=" + + decryptSignature(tags.get("s"), decryptionCode); + } + + audioStreams.add(new VideoInfo.AudioStream(streamUrl, + itagItem.mediaFormatId, + itagItem.bandWidth, + itagItem.samplingRate)); + } + } + } + } catch (Exception e) { + throw new ParsingException("Could not get audiostreams", e); + } + return audioStreams; } @Override public List getVideoStreams() throws ParsingException { Vector videoStreams = new Vector<>(); + try{ String encoded_url_map = playerArgs.getString("url_encoded_fmt_stream_map"); for(String url_data_str : encoded_url_map.split(",")) { try { - Map tags = new HashMap<>(); - for (String raw_tag : Parser.unescapeEntities(url_data_str, true).split("&")) { - String[] split_tag = raw_tag.split("="); - tags.put(split_tag[0], split_tag[1]); - } + // This loop iterates through multiple streams, therefor tags + // is related to one and the same stream at a time. + Map tags = Parser.compatParseMap( + org.jsoup.parser.Parser.unescapeEntities(url_data_str, true)); int itag = Integer.parseInt(tags.get("itag")); - String streamUrl = URLDecoder.decode(tags.get("url"), "UTF-8"); - // if video has a signature: decrypt it and add it to the url - if (tags.get("s") != null) { - streamUrl = streamUrl + "&signature=" - + decryptSignature(tags.get("s"), decryptionCode); - } - - if (resolveFormat(itag) != -1) { - videoStreams.add(new VideoInfo.VideoStream( - streamUrl, - resolveFormat(itag), - resolveResolutionString(itag))); + if (itagIsSupported(itag)) { + ItagItem itagItem = getItagItem(itag); + if(itagItem.itagType == ItagType.VIDEO) { + String streamUrl = tags.get("url"); + // if video has a signature: decrypt it and add it to the url + if (tags.get("s") != null) { + streamUrl = streamUrl + "&signature=" + + decryptSignature(tags.get("s"), decryptionCode); + } + videoStreams.add(new VideoInfo.VideoStream( + streamUrl, + itagItem.mediaFormatId, + itagItem.resolutionString)); + } } } catch (Exception e) { Log.w(TAG, "Could not get Video stream."); @@ -298,19 +451,23 @@ public class YoutubeVideoExtractor implements VideoExtractor { if(videoStreams.isEmpty()) { throw new ParsingException("Failed to get any video stream"); } - return videoStreams; } + @Override + public List getVideoOnlyStreams() throws ParsingException { + return null; + } + /**Attempts to parse (and return) the offset to start playing the video from. * @return the offset (in seconds), or 0 if no timestamp is found.*/ @Override public int getTimeStamp() throws ParsingException { - //todo: add unit test for timestamp + //todo: use video_info for getting timestamp String timeStamp; try { - timeStamp = RegexHelper.matchGroup1("((#|&|\\?)t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)", pageUrl); - } catch (RegexHelper.RegexException e) { + timeStamp = Parser.matchGroup1("((#|&|\\?)t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)", pageUrl); + } catch (Parser.RegexException e) { // catch this instantly since an url does not necessarily have to have a time stamp // -2 because well the testing system will then know its the regex that failed :/ @@ -318,22 +475,21 @@ public class YoutubeVideoExtractor implements VideoExtractor { return -2; } - //TODO: test this if(!timeStamp.isEmpty()) { try { String secondsString = ""; String minutesString = ""; String hoursString = ""; try { - secondsString = RegexHelper.matchGroup1("(\\d{1,3})s", timeStamp); - minutesString = RegexHelper.matchGroup1("(\\d{1,3})m", timeStamp); - hoursString = RegexHelper.matchGroup1("(\\d{1,3})h", timeStamp); + secondsString = Parser.matchGroup1("(\\d{1,3})s", timeStamp); + minutesString = Parser.matchGroup1("(\\d{1,3})m", timeStamp); + hoursString = Parser.matchGroup1("(\\d{1,3})h", timeStamp); } catch (Exception e) { //it could be that time is given in another method if (secondsString.isEmpty() //if nothing was got, && minutesString.isEmpty()//treat as unlabelled seconds && hoursString.isEmpty()) { - secondsString = RegexHelper.matchGroup1("t=(\\d{1,3})", timeStamp); + secondsString = Parser.matchGroup1("t=(\\d{1,3})", timeStamp); } } @@ -446,7 +602,7 @@ public class YoutubeVideoExtractor implements VideoExtractor { info.webpage_url = li.select("a.content-link").first() .attr("abs:href"); - info.id = RegexHelper.matchGroup1("v=([0-9a-zA-Z-]*)", info.webpage_url); + info.id = Parser.matchGroup1("v=([0-9a-zA-Z-]*)", info.webpage_url); //todo: check NullPointerException causing info.title = li.select("span.title").first().text(); @@ -498,19 +654,19 @@ public class YoutubeVideoExtractor implements VideoExtractor { String playerCode = downloader.download(playerUrl); decryptionFuncName = - RegexHelper.matchGroup1("\\.sig\\|\\|([a-zA-Z0-9$]+)\\(", playerCode); + Parser.matchGroup1("\\.sig\\|\\|([a-zA-Z0-9$]+)\\(", playerCode); String functionPattern = "(" + decryptionFuncName.replace("$", "\\$") + "=function\\([a-zA-Z0-9_]*\\)\\{.+?\\})"; - decryptionFunc = "var " + RegexHelper.matchGroup1(functionPattern, playerCode) + ";"; + decryptionFunc = "var " + Parser.matchGroup1(functionPattern, playerCode) + ";"; - helperObjectName = RegexHelper + helperObjectName = Parser .matchGroup1(";([A-Za-z0-9_\\$]{2})\\...\\(", decryptionFunc); String helperPattern = "(var " + helperObjectName.replace("$", "\\$") + "=\\{.+?\\}\\};)"; - helperObject = RegexHelper.matchGroup1(helperPattern, playerCode); + helperObject = Parser.matchGroup1(helperPattern, playerCode); callerFunc = callerFunc.replace("%%", decryptionFuncName); @@ -553,48 +709,4 @@ public class YoutubeVideoExtractor implements VideoExtractor { } return ""; } - - /**These lists only contain itag formats that are supported by the common Android Video player. - However if you are looking for a list showing all itag formats, look at - https://github.com/rg3/youtube-dl/issues/1687 */ - - @SuppressWarnings("WeakerAccess") - public static int resolveFormat(int itag) { - switch(itag) { - // !!! lists only supported formats !!! - // video - case 17: return MediaFormat.v3GPP.id; - case 18: return MediaFormat.MPEG_4.id; - case 22: return MediaFormat.MPEG_4.id; - case 36: return MediaFormat.v3GPP.id; - case 37: return MediaFormat.MPEG_4.id; - case 38: return MediaFormat.MPEG_4.id; - case 43: return MediaFormat.WEBM.id; - case 44: return MediaFormat.WEBM.id; - case 45: return MediaFormat.WEBM.id; - case 46: return MediaFormat.WEBM.id; - default: - //Log.i(TAG, "Itag " + Integer.toString(itag) + " not known or not supported."); - return -1; - } - } - - @SuppressWarnings("WeakerAccess") - public static String resolveResolutionString(int itag) { - switch(itag) { - case 17: return "144p"; - case 18: return "360p"; - case 22: return "720p"; - case 36: return "240p"; - case 37: return "1080p"; - case 38: return "1080p"; - case 43: return "360p"; - case 44: return "480p"; - case 45: return "720p"; - case 46: return "1080p"; - default: - //Log.i(TAG, "Itag " + Integer.toString(itag) + " not known or not supported."); - return null; - } - } } diff --git a/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeVideoUrlIdHandler.java b/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeVideoUrlIdHandler.java index b9d2b4fe8..7e1786a5b 100644 --- a/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeVideoUrlIdHandler.java +++ b/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeVideoUrlIdHandler.java @@ -1,7 +1,7 @@ package org.schabi.newpipe.crawler.services.youtube; +import org.schabi.newpipe.crawler.Parser; import org.schabi.newpipe.crawler.ParsingException; -import org.schabi.newpipe.crawler.RegexHelper; import org.schabi.newpipe.crawler.VideoUrlIdHandler; /** @@ -47,7 +47,7 @@ public class YoutubeVideoUrlIdHandler implements VideoUrlIdHandler { throw new ParsingException("Error no suitable url: " + url); } - id = RegexHelper.matchGroup1(pat, url); + id = Parser.matchGroup1(pat, url); if(!id.isEmpty()){ //Log.i(TAG, "string \""+url+"\" matches!"); return id;